PyPI - keep-skill - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

keep-skill 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

keep/__init__.py +1 -1
keep/api.py +45 -40
keep/cli.py +142 -28
keep/config.py +1 -1
keep/data/system/library.md +144 -0
keep/providers/base.py +39 -0
keep/providers/llm.py +29 -41
keep/providers/mlx.py +18 -21
keep/store.py +31 -0
{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/METADATA +8 -5
{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/RECORD +14 -13
{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/WHEEL +0 -0
{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/entry_points.txt +0 -0
{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/licenses/LICENSE +0 -0

keep/__init__.py CHANGED Viewed

@@ -40,7 +40,7 @@ if not os.environ.get("KEEP_VERBOSE"):
 from .api import Keeper, NOWDOC_ID
 from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX, INTERNAL_TAGS
-__version__ = "0.7.0"
+__version__ = "0.10.0"
 __all__ = [
     "Keeper",
     "Item",

keep/api.py CHANGED Viewed

@@ -191,6 +191,7 @@ SYSTEM_DOC_IDS = {
     "now.md": "_system:now",
     "conversations.md": "_system:conversations",
     "domains.md": "_system:domains",
+    "library.md": "_system:library",
 }
@@ -404,24 +405,25 @@ class Keeper:
         except Exception as e:
             logger.debug("Error scanning old system docs: %s", e)
-        # Second pass: create any missing system docs from bundled content
+        # Second pass: create or update system docs from bundled content
         for path in SYSTEM_DOC_DIR.glob("*.md"):
             new_id = SYSTEM_DOC_IDS.get(path.name)
             if new_id is None:
                 logger.debug("Skipping unknown system doc: %s", path.name)
                 continue
-            # Skip if already exists
-            if self.exists(new_id):
-                stats["skipped"] += 1
-                continue
             try:
                 content, tags = _load_frontmatter(path)
                 tags["category"] = "system"
+                existed = self.exists(new_id)
+                # remember() handles both create and update (with re-summarization)
                 self.remember(content, id=new_id, tags=tags)
-                stats["created"] += 1
-                logger.info("Created system doc: %s", new_id)
+                if existed:
+                    stats["migrated"] += 1
+                    logger.info("Updated system doc: %s", new_id)
+                else:
+                    stats["created"] += 1
+                    logger.info("Created system doc: %s", new_id)
             except FileNotFoundError:
                 # System file missing - skip silently
                 pass
@@ -534,13 +536,17 @@ class Keeper:
         summary: Optional[str] = None,
         source_tags: Optional[dict[str, str]] = None,  # Deprecated alias
         collection: Optional[str] = None,
-        lazy: bool = False
     ) -> Item:
         """
         Insert or update a document in the store.
         Fetches the document, generates embeddings and summary, then stores it.
+        **Summary behavior:**
+        - If summary is provided, use it (skips auto-summarization)
+        - For large content, summarization is async (truncated placeholder
+          stored immediately, real summary generated in background)
         **Update behavior:**
         - Summary: Replaced with user-provided or newly generated summary
         - Tags: Merged - existing tags are preserved, new tags override
@@ -553,9 +559,6 @@ class Keeper:
             summary: User-provided summary (skips auto-summarization if given)
             source_tags: Deprecated alias for 'tags'
             collection: Target collection (uses default if None)
-            lazy: If True, use truncated placeholder summary and queue for
-                  background processing. Use `process_pending()` to generate
-                  real summaries later. Ignored if summary is provided.
         Returns:
             The stored Item with merged tags and new summary
@@ -615,17 +618,14 @@ class Keeper:
                 )
                 summary = summary[:max_len]
             final_summary = summary
-        elif lazy:
-            # Truncated placeholder for lazy mode
+        else:
+            # Large content: async summarization (truncated placeholder now, real summary later)
             if len(doc.content) > max_len:
                 final_summary = doc.content[:max_len] + "..."
+                # Queue for background processing
+                self._pending_queue.enqueue(id, coll, doc.content)
             else:
                 final_summary = doc.content
-            # Queue for background processing
-            self._pending_queue.enqueue(id, coll, doc.content)
-        else:
-            # Auto-generate summary
-            final_summary = self._get_summarization_provider().summarize(doc.content)
         # Build tags: existing → config → env → user (later wins on collision)
         merged_tags = {**existing_tags}
@@ -686,8 +686,8 @@ class Keeper:
                     tags=old_doc.tags,
                 )
-        # Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
-        if lazy and summary is None and not content_unchanged:
+        # Spawn background processor if content was queued (large content, no user summary, content changed)
+        if summary is None and len(doc.content) > max_len and not content_unchanged:
             self._spawn_processor()
         # Return the stored item
@@ -703,7 +703,6 @@ class Keeper:
         tags: Optional[dict[str, str]] = None,
         source_tags: Optional[dict[str, str]] = None,  # Deprecated alias
         collection: Optional[str] = None,
-        lazy: bool = False
     ) -> Item:
         """
         Store inline content directly (without fetching from a URI).
@@ -713,7 +712,8 @@ class Keeper:
         **Smart summary behavior:**
         - If summary is provided, use it (skips auto-summarization)
         - If content is short (≤ max_summary_length), use content verbatim
-        - Otherwise, generate summary via summarization provider
+        - For large content, summarization is async (truncated placeholder
+          stored immediately, real summary generated in background)
         **Update behavior (when id already exists):**
         - Summary: Replaced with user-provided, content, or generated summary
@@ -728,9 +728,6 @@ class Keeper:
             tags: User-provided tags to merge with existing tags
             source_tags: Deprecated alias for 'tags'
             collection: Target collection (uses default if None)
-            lazy: If True and content is long, use truncated placeholder summary
-                  and queue for background processing. Ignored if content is
-                  short or summary is provided.
         Returns:
             The stored Item with merged tags and new summary
@@ -794,14 +791,11 @@ class Keeper:
         elif len(content) <= max_len:
             # Content is short enough - use verbatim (smart summary)
             final_summary = content
-        elif lazy:
-            # Content is long and lazy mode - truncated placeholder
+        else:
+            # Content is long - async summarization (truncated placeholder now, real summary later)
             final_summary = content[:max_len] + "..."
             # Queue for background processing
             self._pending_queue.enqueue(id, coll, content)
-        else:
-            # Content is long - generate summary
-            final_summary = self._get_summarization_provider().summarize(content)
         # Build tags: existing → config → env → user (later wins on collision)
         merged_tags = {**existing_tags}
@@ -860,8 +854,8 @@ class Keeper:
                     tags=old_doc.tags,
                 )
-        # Spawn background processor if lazy and content was queued (only if content changed)
-        if lazy and summary is None and len(content) > max_len and not content_unchanged:
+        # Spawn background processor if content was queued (large content, no user summary, content changed)
+        if summary is None and len(content) > max_len and not content_unchanged:
             self._spawn_processor()
         # Return the stored item
@@ -1366,14 +1360,14 @@ class Keeper:
     def get_now(self) -> Item:
         """
-        Get the current working context.
+        Get the current working intentions.
         A singleton document representing what you're currently working on.
         If it doesn't exist, creates one with default content and tags from
         the bundled system now.md file.
         Returns:
-            The current context Item (never None - auto-creates if missing)
+            The current intentions Item (never None - auto-creates if missing)
         """
         item = self.get(NOWDOC_ID)
         if item is None:
@@ -1394,13 +1388,13 @@ class Keeper:
         tags: Optional[dict[str, str]] = None,
     ) -> Item:
         """
-        Set the current working context.
+        Set the current working intentions.
-        Updates the singleton context with new content. Uses remember()
+        Updates the singleton intentions with new content. Uses remember()
         internally with the fixed NOWDOC_ID.
         Args:
-            content: New content for the current context
+            content: New content for the current intentions
             tags: Optional additional tags to apply
         Returns:
@@ -1784,10 +1778,18 @@ class Keeper:
     def close(self) -> None:
         """
-        Close resources (embedding cache connection, pending queue, etc.).
+        Close resources (stores, caches, queues).
         Good practice to call when done, though Python's GC will clean up eventually.
         """
+        # Close ChromaDB store
+        if hasattr(self, '_store') and self._store is not None:
+            self._store.close()
+        # Close document store (SQLite)
+        if hasattr(self, '_document_store') and self._document_store is not None:
+            self._document_store.close()
         # Close embedding cache if it was loaded
         if self._embedding_provider is not None:
             if hasattr(self._embedding_provider, '_cache'):
@@ -1810,4 +1812,7 @@ class Keeper:
     def __del__(self):
         """Cleanup on deletion."""
-        self.close()
+        try:
+            self.close()
+        except Exception:
+            pass  # Suppress errors during garbage collection

keep/cli.py CHANGED Viewed

@@ -38,6 +38,13 @@ else:
     configure_quiet_mode(quiet=True)
+def _version_callback(value: bool):
+    if value:
+        from importlib.metadata import version
+        print(f"keep {version('keep-skill')}")
+        raise typer.Exit()
 def _verbose_callback(value: bool):
     if value:
         enable_debug_mode()
@@ -242,6 +249,12 @@ def main_callback(
         callback=_full_callback,
         is_eager=True,
     )] = False,
+    version: Annotated[Optional[bool], typer.Option(
+        "--version",
+        help="Show version and exit",
+        callback=_version_callback,
+        is_eager=True,
+    )] = None,
     store: Annotated[Optional[Path], typer.Option(
         "--store", "-s",
         envvar="KEEP_STORE_PATH",
@@ -251,7 +264,7 @@ def main_callback(
     )] = None,
 ):
     """Reflective memory with semantic search."""
-    # If no subcommand provided, show the current context (now)
+    # If no subcommand provided, show the current intentions (now)
     if ctx.invoked_subcommand is None:
         from .api import NOWDOC_ID
         kp = _get_keeper(None, "default")
@@ -285,6 +298,7 @@ CollectionOption = Annotated[
     str,
     typer.Option(
         "--collection", "-c",
+        envvar="KEEP_COLLECTION",
         help="Collection name"
     )
 ]
@@ -442,6 +456,28 @@ def _parse_tags(tags: Optional[list[str]]) -> dict[str, str]:
     return parsed
+def _filter_by_tags(items: list, tags: list[str]) -> list:
+    """
+    Filter items by tag specifications (AND logic).
+    Each tag can be:
+    - "key" - item must have this tag key (any value)
+    - "key=value" - item must have this exact tag
+    """
+    if not tags:
+        return items
+    result = items
+    for t in tags:
+        if "=" in t:
+            key, value = t.split("=", 1)
+            result = [item for item in result if item.tags.get(key) == value]
+        else:
+            # Key only - check if key exists
+            result = [item for item in result if t in item.tags]
+    return result
 def _timestamp() -> str:
     """Generate timestamp for auto-generated IDs."""
     from datetime import datetime, timezone
@@ -475,6 +511,10 @@ def find(
     include_self: Annotated[bool, typer.Option(
         help="Include the queried item (only with --id)"
     )] = False,
+    tag: Annotated[Optional[list[str]], typer.Option(
+        "--tag", "-t",
+        help="Filter by tag (key or key=value, repeatable)"
+    )] = None,
     store: StoreOption = None,
     collection: CollectionOption = "default",
     limit: LimitOption = 10,
@@ -487,6 +527,7 @@ def find(
     Examples:
         keep find "authentication"              # Search by text
         keep find --id file:///path/to/doc.md   # Find similar to item
+        keep find "auth" -t project=myapp       # Search + filter by tag
     """
     if id and query:
         typer.echo("Error: Specify either a query or --id, not both", err=True)
@@ -497,12 +538,19 @@ def find(
     kp = _get_keeper(store, collection)
+    # Search with higher limit if filtering, then post-filter
+    search_limit = limit * 5 if tag else limit
     if id:
-        results = kp.find_similar(id, limit=limit, since=since, include_self=include_self)
+        results = kp.find_similar(id, limit=search_limit, since=since, include_self=include_self)
     else:
-        results = kp.find(query, limit=limit, since=since)
+        results = kp.find(query, limit=search_limit, since=since)
-    typer.echo(_format_items(results, as_json=_get_json_output()))
+    # Post-filter by tags if specified
+    if tag:
+        results = _filter_by_tags(results, tag)
+    typer.echo(_format_items(results[:limit], as_json=_get_json_output()))
 @app.command()
@@ -683,10 +731,6 @@ def update(
         "--summary",
         help="User-provided summary (skips auto-summarization)"
     )] = None,
-    lazy: Annotated[bool, typer.Option(
-        "--lazy",
-        help="Fast mode: use truncated summary, queue for later processing"
-    )] = False,
 ):
     """
     Add or update a document in the store.
@@ -715,15 +759,15 @@ def update(
         parsed_tags = {**frontmatter_tags, **parsed_tags}  # CLI tags override
         # Use content-addressed ID for stdin text (enables versioning)
         doc_id = id or _text_content_id(content)
-        item = kp.remember(content, id=doc_id, summary=summary, tags=parsed_tags or None, lazy=lazy)
+        item = kp.remember(content, id=doc_id, summary=summary, tags=parsed_tags or None)
     elif source and _URI_SCHEME_PATTERN.match(source):
         # URI mode: fetch from URI (ID is the URI itself)
-        item = kp.update(source, tags=parsed_tags or None, summary=summary, lazy=lazy)
+        item = kp.update(source, tags=parsed_tags or None, summary=summary)
     elif source:
         # Text mode: inline content (no :// in source)
         # Use content-addressed ID for text (enables versioning)
         doc_id = id or _text_content_id(source)
-        item = kp.remember(source, id=doc_id, summary=summary, tags=parsed_tags or None, lazy=lazy)
+        item = kp.remember(source, id=doc_id, summary=summary, tags=parsed_tags or None)
     else:
         typer.echo("Error: Provide content, URI, or '-' for stdin", err=True)
         raise typer.Exit(1)
@@ -756,19 +800,25 @@ def now(
     collection: CollectionOption = "default",
     tags: Annotated[Optional[list[str]], typer.Option(
         "--tag", "-t",
-        help="Tag as key=value (can be repeated)"
+        help="Set tag (with content) or filter (without content)"
     )] = None,
 ):
     """
-    Get or set the current working context.
+    Get or set the current working intentions.
-    With no arguments, displays the current context.
+    With no arguments, displays the current intentions.
     With content, replaces it.
+    Tags behave differently based on mode:
+    - With content: -t sets tags on the update
+    - Without content: -t filters version history
     \b
     Examples:
-        keep now                         # Show current context
-        keep now "What's important now"  # Update context
+        keep now                         # Show current intentions
+        keep now "What's important now"  # Update intentions
+        keep now "Auth work" -t project=myapp  # Update with tag
+        keep now -t project=myapp        # Find version with tag
         keep now -f context.md           # Read content from file
         keep now --reset                 # Reset to default from system
         keep now -V 1                    # Previous version
@@ -891,18 +941,70 @@ def now(
         item = kp.set_now(new_content, tags=parsed_tags or None)
         typer.echo(_format_item(item, as_json=_get_json_output()))
     else:
-        # Get current context with version navigation and similar items
-        item = kp.get_now()
-        version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
-        similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
-        similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
-        typer.echo(_format_item(
-            item,
-            as_json=_get_json_output(),
-            version_nav=version_nav,
-            similar_items=similar_items,
-            similar_offsets=similar_offsets,
-        ))
+        # Get current intentions (or search version history if tags specified)
+        if tags:
+            # Search version history for most recent version with matching tags
+            item = _find_now_version_by_tags(kp, tags, collection)
+            if item is None:
+                typer.echo("No version found matching tags", err=True)
+                raise typer.Exit(1)
+            # No version nav or similar items for filtered results
+            typer.echo(_format_item(item, as_json=_get_json_output()))
+        else:
+            # Standard: get current with version navigation and similar items
+            item = kp.get_now()
+            version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
+            similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
+            similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
+            typer.echo(_format_item(
+                item,
+                as_json=_get_json_output(),
+                version_nav=version_nav,
+                similar_items=similar_items,
+                similar_offsets=similar_offsets,
+            ))
+def _find_now_version_by_tags(kp, tags: list[str], collection: str):
+    """
+    Search nowdoc version history for most recent version matching all tags.
+    Checks current version first, then scans archived versions.
+    """
+    from .api import NOWDOC_ID
+    # Parse tag filters
+    tag_filters = []
+    for t in tags:
+        if "=" in t:
+            key, value = t.split("=", 1)
+            tag_filters.append((key, value))
+        else:
+            tag_filters.append((t, None))  # Key only
+    def matches_tags(item_tags: dict) -> bool:
+        for key, value in tag_filters:
+            if value is not None:
+                if item_tags.get(key) != value:
+                    return False
+            else:
+                if key not in item_tags:
+                    return False
+        return True
+    # Check current version first
+    current = kp.get_now()
+    if current and matches_tags(current.tags):
+        return current
+    # Scan archived versions (newest first)
+    versions = kp.list_versions(NOWDOC_ID, limit=100, collection=collection)
+    for i, v in enumerate(versions):
+        if matches_tags(v.tags):
+            # Found match - get full item at this version offset
+            return kp.get_version(NOWDOC_ID, i + 1, collection=collection)
+    return None
 @app.command()
@@ -924,6 +1026,10 @@ def get(
         "--no-similar",
         help="Suppress similar items in output"
     )] = False,
+    tag: Annotated[Optional[list[str]], typer.Option(
+        "--tag", "-t",
+        help="Require tag (key or key=value, repeatable)"
+    )] = None,
     limit: Annotated[int, typer.Option(
         "--limit", "-n",
         help="Max items for --history or --similar (default: 10)"
@@ -944,6 +1050,7 @@ def get(
         keep get doc:1 --history        # List all versions
         keep get doc:1 --similar        # List similar items
         keep get doc:1 --no-similar     # Suppress similar items
+        keep get doc:1 -t project=myapp # Only if tag matches
     """
     kp = _get_keeper(store, collection)
@@ -1078,6 +1185,13 @@ def get(
             typer.echo(f"Not found: {actual_id}", err=True)
         raise typer.Exit(1)
+    # Check tag filter if specified
+    if tag:
+        filtered = _filter_by_tags([item], tag)
+        if not filtered:
+            typer.echo(f"Tag filter not matched: {actual_id}", err=True)
+            raise typer.Exit(1)
     # Get version navigation
     version_nav = kp.get_version_nav(actual_id, internal_version, collection=collection)

keep/config.py CHANGED Viewed

@@ -19,7 +19,7 @@ import tomli_w
 CONFIG_FILENAME = "keep.toml"
 CONFIG_VERSION = 3  # Bumped for document versioning support
-SYSTEM_DOCS_VERSION = 1  # Increment when bundled system docs content changes
+SYSTEM_DOCS_VERSION = 3  # Increment when bundled system docs content changes
 @dataclass

keep/data/system/library.md ADDED Viewed

@@ -0,0 +1,144 @@
+---
+tags:
+  category: system
+  context: library
+---
+# Library
+Public domain texts for testing and bootstrapping keep.
+The content, as well as the format, is relevant to the practice of this skill.
+## Resolving File Paths
+The library files are located in the `docs/library/` directory of the keep package.
+To construct URIs for these files:
+1. **In a cloned repo:** Use `file://$PWD/docs/library/{filename}`
+2. **With installed package:** Use Python to find the path:
+   ```python
+   from importlib.resources import files
+   library_path = files("keep").parent / "docs" / "library"
+   uri = f"file://{library_path}/{filename}"
+   ```
+---
+## Files
+### ancrenewisse.pdf
+- **URI template:** `file://{keep_library}/ancrenewisse.pdf`
+- **Title:** Ancrene Wisse (Ancrene Riwle)
+- **Date:** c. 1200s (13th century)
+- **Language:** Middle English
+- **Translator:** James Morton, The Camden Society, London 1853
+- **Source:** https://www.bsswebsite.me.uk/History/AncreneRiwle/AncreneRiwle2.html
+- **Status:** Public domain
+- **Description:** A monastic guide for Christian anchoresses.  Provides guidance on conduct with an "inner" and "outer" rule, and their relationship: "one relates to the right conduct of the heart; the other, to the regulation of the outward life".
+---
+### impermanence_verse.txt
+- **URI template:** `file://{keep_library}/impermanence_verse.txt`
+- **Title:** 無常偈 (Impermanence Verse / Closing Verse)
+- **Date:** Traditional Zen liturgy (exact origin uncertain)
+- **Language:** Japanese (Kanji/Kana), with romanization and multiple English translations
+- **Source:** Soto Zen liturgy
+- **Status:** Traditional teaching, freely shared
+- **Description:** Four-line verse chanted at the end of Zen practice sessions. "Great is the matter of birth and death / Life slips quickly by / Time waits for no one / Wake up! Wake up!" Includes character-by-character breakdown, cultural context, and linguistic notes.
+---
+### mn61.html
+- **URI template:** `file://{keep_library}/mn61.html`
+- **Title:** Ambalaṭṭhikārāhulovāda Sutta (MN 61) - The Exhortation to Rāhula at Mango Stone
+- **Date:** Original: ~5th century BCE; Translation: contemporary
+- **Language:** English translation from Pali
+- **Translator:** Thanissaro Bhikkhu
+- **Source:** https://www.dhammatalks.org/suttas/MN/MN61.html
+- **Format:** Raw HTML (complete with markup, navigation, footnotes)
+- **License:** Freely distributed for educational use
+- **Description:** Buddha's teaching to his son Rāhula on reflection before, during, and after bodily, verbal, and mental actions. The triple-check pattern: reflect before acting/speaking, check while doing, review after. Mirror metaphor for self-reflection.
+**Format note:** Kept as raw HTML to test document processing and summarization on markup-heavy content.
+---
+### an5.57_translation-en-sujato.json
+- **URI template:** `file://{keep_library}/an5.57_translation-en-sujato.json`
+- **Title:** Upajjhāyasutta (AN 5.57) - Subjects for Regular Reviewing
+- **Date:** Original: ~5th century BCE; Translation: modern
+- **Language:** English translation from Pali
+- **Translator:** Bhikkhu Sujato
+- **Source:** SuttaCentral
+- **Source URL:** https://suttacentral.net/an5.57/en/sujato?lang=en
+- **Data:** https://github.com/suttacentral/sc-data/blob/main/sc_bilara_data/translation/en/sujato/sutta/an/an5/an5.57_translation-en-sujato.json
+- **License:** Creative Commons CC0 1.0 Universal (SuttaCentral translations)
+- **Description:** The Five Remembrances - five subjects that all sentient beings should reflect on regularly: aging, sickness, death, separation from loved ones, and being heir to one's own actions.  "Reviewing this subject often, they entirely give up bad conduct, or at least reduce it".
+---
+### fortytwo_chapters.txt
+- **URI template:** `file://{keep_library}/fortytwo_chapters.txt`
+- **Title:** 佛說四十二章經 (Sutra of Forty-Two Chapters)
+- **Date:** Eastern Han Dynasty (25-220 CE)
+- **Language:** Classical Chinese
+- **Source:** Project Gutenberg (#23585)
+- **Status:** Public domain
+- **Description:** One of the earliest Buddhist texts to reach China, traditionally attributed to translation by Kāśyapa Mātaṅga and Dharmarakṣa
+---
+### mumford_sticks_and_stones.txt
+- **URI template:** `file://{keep_library}/mumford_sticks_and_stones.txt`
+- **Title:** Sticks and Stones: A Study of American Architecture and Civilization
+- **Author:** Lewis Mumford (1895-1990)
+- **Date:** 1924
+- **Language:** English
+- **Source:** Internet Archive (sticksstones0000lewi)
+- **Status:** Public domain (published before 1929)
+- **Description:** Mumford's first major work on architecture, examining American building traditions from medieval influences through industrialization. Includes chapters on "The Medieval Tradition," "The Renaissance in New England," "The Age of Rationalism," and more.
+**Note:** This is OCR text from archive.org. Quality is good but may contain occasional scanning artifacts.
+---
+### true_person_no_rank.md
+- **URI template:** `file://{keep_library}/true_person_no_rank.md`
+- **Title:** 無位真人 (The True Person of No Rank)
+- **Date:** Original: 9th century CE; Commentary layers: 9th-20th centuries
+- **Language:** Chinese (verified original text) with English translation and commentary
+- **Source:** Record of Linji (臨濟錄, Línjì Lù); Book of Serenity (從容錄) Case 38
+- **Primary sources:** DILA Buddhist Dictionary, multiple scholarly translations
+- **Status:** Core teaching in public domain; compiled with verification notes
+- **Description:** Linji Yixuan's famous teaching: "Within this mass of red flesh, there is a true person of no rank, constantly coming and going through the gates of your face." Multi-layered document exploring the original teaching, koan tradition, Dōgen's commentary, modern interpretations, and linguistic analysis. Includes Chinese text (verified), translations, and commentary relationships.
+---
+## Usage for Testing
+These texts provide diverse test cases for keep:
+1. **Different languages:** English, Chinese (Classical and modern romanization), Japanese, Middle English, Pali (via translation)
+2. **Different formats:** PDF, plain text, JSON, Markdown, HTML (with markup)
+3. **Different domains:** Buddhist teachings, Zen liturgy, architectural criticism, medieval instructional prose
+4. **Different writing styles:** Ancient scripture, koan commentary, scholarly analysis, liturgical verse, teaching notes
+5. **Different lengths:** Four-line verses to full books
+6. **Different structures:** Linear narratives, multi-layered commentaries, character-by-character analysis, mirror patterns, web documents with navigation
+7. **Multilingual content:** Japanese-English parallel texts, Chinese with romanization, cross-linguistic terminology
+8. **Processing challenges:** Markdown, UTF-8 plaintext, OCR artifacts (Mumford), HTML markup (MN 61), PDF extraction (Ancrene Wisse), structured JSON data (AN5.57).
+---
+## Adding More Test Data
+When adding public domain texts:
+1. Verify their relevance to the practice of this skill
+2. Verify compatibility with the MIT license, e.g. public domain status (pre-1929 for US, or explicit license)
+3. Include source URL (Project Gutenberg, archive.org, etc.)
+4. Add metadata to this index
+---
+## License
+Each text retains its original license status (public domain or Creative Commons as noted above). This index and dataset organization is released under CC0 1.0.

keep/providers/base.py CHANGED Viewed

@@ -148,6 +148,45 @@ class EmbeddingProvider(Protocol):
 # Summarization
 # -----------------------------------------------------------------------------
+# Shared system prompt for all LLM-based summarization providers
+SUMMARIZATION_SYSTEM_PROMPT = """You are a precise summarization assistant.
+Create a concise summary of the provided document that captures:
+- The main purpose or topic
+- Key points or functionality
+- Important details that would help someone decide if this document is relevant
+IMPORTANT: Start the summary directly with the content. Do NOT begin with phrases like:
+- "Here is a concise summary"
+- "This document describes"
+- "The document covers"
+- "Summary:"
+Just state the facts directly. Keep the summary under 200 words."""
+def strip_summary_preamble(text: str) -> str:
+    """
+    Remove common LLM preambles from summaries.
+    Many models add introductory phrases despite instructions not to.
+    This post-processes the output to strip them.
+    """
+    import re
+    preambles = [
+        r"^here is a concise summary[^:]*:\s*",
+        r"^here is the summary[^:]*:\s*",
+        r"^here's a summary[^:]*:\s*",
+        r"^summary:\s*",
+        r"^the document describes\s+",
+        r"^this document describes\s+",
+        r"^the document covers\s+",
+        r"^this document covers\s+",
+    ]
+    result = text
+    for pattern in preambles:
+        result = re.sub(pattern, "", result, flags=re.IGNORECASE)
+    return result
 @runtime_checkable
 class SummarizationProvider(Protocol):
     """

keep/providers/llm.py CHANGED Viewed

@@ -6,7 +6,13 @@ import json
 import os
 from typing import Any
-from .base import SummarizationProvider, TaggingProvider, get_registry
+from .base import (
+    SummarizationProvider,
+    TaggingProvider,
+    get_registry,
+    SUMMARIZATION_SYSTEM_PROMPT,
+    strip_summary_preamble,
+)
 # -----------------------------------------------------------------------------
@@ -16,19 +22,11 @@ from .base import SummarizationProvider, TaggingProvider, get_registry
 class AnthropicSummarization:
     """
     Summarization provider using Anthropic's Claude API.
     Requires: ANTHROPIC_API_KEY environment variable.
     Optionally reads from OpenClaw config via OPENCLAW_CONFIG env var.
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
     def __init__(
         self,
         model: str = "claude-3-5-haiku-20241022",
@@ -56,22 +54,22 @@ Be factual and specific. Do not include phrases like "This document" - just stat
         """Generate summary using Anthropic Claude."""
         # Truncate very long content
         truncated = content[:50000] if len(content) > 50000 else content
         try:
             response = self.client.messages.create(
                 model=self.model,
                 max_tokens=self.max_tokens,
-                system=self.SYSTEM_PROMPT,
+                system=SUMMARIZATION_SYSTEM_PROMPT,
                 messages=[
                     {"role": "user", "content": truncated}
                 ],
             )
             # Extract text from response
             if response.content and len(response.content) > 0:
-                return response.content[0].text
+                return strip_summary_preamble(response.content[0].text)
             return truncated[:500]  # Fallback
-        except Exception as e:
+        except Exception:
             # Fallback to truncation on error
             return truncated[:500]
@@ -79,18 +77,10 @@ Be factual and specific. Do not include phrases like "This document" - just stat
 class OpenAISummarization:
     """
     Summarization provider using OpenAI's chat API.
     Requires: KEEP_OPENAI_API_KEY or OPENAI_API_KEY environment variable.
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
     def __init__(
         self,
         model: str = "gpt-4o-mini",
@@ -101,41 +91,39 @@ Be factual and specific. Do not include phrases like "This document" - just stat
             from openai import OpenAI
         except ImportError:
             raise RuntimeError("OpenAISummarization requires 'openai' library")
         self.model = model
         self.max_tokens = max_tokens
         key = api_key or os.environ.get("KEEP_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
         if not key:
             raise ValueError("OpenAI API key required")
         self._client = OpenAI(api_key=key)
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using OpenAI."""
         # Truncate very long content to avoid token limits
         truncated = content[:50000] if len(content) > 50000 else content
         response = self._client.chat.completions.create(
             model=self.model,
             messages=[
-                {"role": "system", "content": self.SYSTEM_PROMPT},
+                {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                 {"role": "user", "content": truncated},
             ],
             max_tokens=self.max_tokens,
             temperature=0.3,
         )
-        return response.choices[0].message.content.strip()
+        return strip_summary_preamble(response.choices[0].message.content.strip())
 class OllamaSummarization:
     """
     Summarization provider using Ollama's local API.
     """
-    SYSTEM_PROMPT = OpenAISummarization.SYSTEM_PROMPT
     def __init__(
         self,
         model: str = "llama3.2",
@@ -143,27 +131,27 @@ class OllamaSummarization:
     ):
         self.model = model
         self.base_url = base_url.rstrip("/")
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using Ollama."""
         import requests
         truncated = content[:50000] if len(content) > 50000 else content
         response = requests.post(
             f"{self.base_url}/api/chat",
             json={
                 "model": self.model,
                 "messages": [
-                    {"role": "system", "content": self.SYSTEM_PROMPT},
+                    {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                     {"role": "user", "content": truncated},
                 ],
                 "stream": False,
             },
         )
         response.raise_for_status()
-        return response.json()["message"]["content"].strip()
+        return strip_summary_preamble(response.json()["message"]["content"].strip())
 class PassthroughSummarization:

keep/providers/mlx.py CHANGED Viewed

@@ -10,7 +10,13 @@ Requires: pip install mlx-lm mlx
 import os
 from typing import Any
-from .base import EmbeddingProvider, SummarizationProvider, get_registry
+from .base import (
+    EmbeddingProvider,
+    SummarizationProvider,
+    get_registry,
+    SUMMARIZATION_SYSTEM_PROMPT,
+    strip_summary_preamble,
+)
 class MLXEmbedding:
@@ -75,21 +81,12 @@ class MLXEmbedding:
 class MLXSummarization:
     """
     Summarization provider using MLX-LM on Apple Silicon.
     Runs local LLMs optimized for Apple Silicon. No API key required.
     Requires: pip install mlx-lm
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly.
-Keep the summary under 200 words."""
     def __init__(
         self,
         model: str = "mlx-community/Llama-3.2-3B-Instruct-4bit",
@@ -122,27 +119,27 @@ Keep the summary under 200 words."""
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using MLX-LM."""
         from mlx_lm import generate
         # Truncate very long content to fit context window
         # Most models have 4k-8k context, leave room for prompt and response
         max_content_chars = 12000
         truncated = content[:max_content_chars] if len(content) > max_content_chars else content
         # Format as chat (works with instruction-tuned models)
         if hasattr(self._tokenizer, "apply_chat_template"):
             messages = [
-                {"role": "system", "content": self.SYSTEM_PROMPT},
+                {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                 {"role": "user", "content": f"Summarize the following:\n\n{truncated}"},
             ]
             prompt = self._tokenizer.apply_chat_template(
-                messages,
-                tokenize=False,
+                messages,
+                tokenize=False,
                 add_generation_prompt=True
             )
         else:
             # Fallback for models without chat template
-            prompt = f"{self.SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
+            prompt = f"{SUMMARIZATION_SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
         # Generate
         response = generate(
             self._model,
@@ -151,8 +148,8 @@ Keep the summary under 200 words."""
             max_tokens=self.max_tokens,
             verbose=False,
         )
-        return response.strip()
+        return strip_summary_preamble(response.strip())
 class MLXTagging:

keep/store.py CHANGED Viewed

@@ -556,3 +556,34 @@ class ChromaStore:
         """Return the number of items in a collection."""
         coll = self._get_collection(collection)
         return coll.count()
+    # -------------------------------------------------------------------------
+    # Resource Management
+    # -------------------------------------------------------------------------
+    def close(self) -> None:
+        """
+        Close ChromaDB client and release resources.
+        Good practice to call when done, though Python's GC will clean up eventually.
+        """
+        self._collections.clear()
+        # ChromaDB PersistentClient doesn't have explicit close(),
+        # but clearing references allows garbage collection
+        self._client = None
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - close resources."""
+        self.close()
+        return False
+    def __del__(self):
+        """Cleanup on deletion."""
+        try:
+            self.close()
+        except Exception:
+            pass  # Suppress errors during garbage collection

{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keep-skill
-Version: 0.8.1
+Version: 0.10.0
 Summary: Reflective memory - remember and search documents by meaning
 Project-URL: Homepage, https://github.com/hughpyle/keep
 Project-URL: Repository, https://github.com/hughpyle/keep
@@ -24,6 +24,7 @@ Requires-Dist: typer>=0.9
 Provides-Extra: anthropic
 Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
 Provides-Extra: dev
+Requires-Dist: google-genai>=1.0.0; extra == 'dev'
 Requires-Dist: pytest-cov>=4.0; extra == 'dev'
 Requires-Dist: pytest>=7.0; extra == 'dev'
 Provides-Extra: documents
@@ -60,7 +61,7 @@ uv tool install 'keep-skill[local]'
 keep init
 # Index content
-keep update path/to/document.md -t project=myapp
+keep update file:///path/to/document.md -t project=myapp
 keep update "Rate limit is 100 req/min" -t topic=api
 # Search by meaning
@@ -68,7 +69,7 @@ keep find "what's the rate limit?"
 # Track what you're working on
 keep now "Debugging auth flow"
-keep now -V 1                    # Previous context
+keep now -V 1                    # Previous intentions
 ```
 ---
@@ -115,6 +116,7 @@ keep init                              # Creates .keep/ at repo root
 # Index files and notes
 keep update file:///path/to/doc.md -t project=myapp
+keep update "Token refresh needs clock sync" -t topic=auth
 keep update "Important insight" -t type=note
 # Search
@@ -129,11 +131,12 @@ keep get ID --history                  # All versions
 # Tags
 keep list --tag project=myapp          # Find by tag
+keep find "auth" -t topic=auth         # Cross-project topic search
 keep list --tags=                      # List all tag keys
-# Current context
+# Current intentions
 keep now                               # Show what you're working on
-keep now "Fixing login bug"            # Update context
+keep now "Fixing login bug"            # Update intentions
 ```
 ### Python API

{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-keep/__init__.py,sha256=yBK7jvbQmx9fRBGanICNgrQdyCQHzt5bNPp098Qvh9E,1621
+keep/__init__.py,sha256=-KvH6aY6B2n2kKGSizYqoQK38FWOa5lEfzYyRst33lU,1622
 keep/__main__.py,sha256=3Uu70IhIDIjh8OW6jp9jQQ3dF2lKdJWi_3FtRIQMiMY,104
-keep/api.py,sha256=V_w4ahhgmgk10ehjs5JAvTct8wOvL8xsvW-nko_9Yjw,64858
+keep/api.py,sha256=GGMY6w2a1b-Yve1aWz35dhZuSA9Va5Whe6M1Ek87yw4,65105
 keep/chunking.py,sha256=neAXOLSvVwbUxapbqq7nZrbSNSzMXuhxj-ODoOSodsU,11830
-keep/cli.py,sha256=SAB4LEZrX08sDVBxYD1s_zo2WXTYJBwhZswlQdjVkQ8,46024
-keep/config.py,sha256=hWjiJDg2u6p8IJpksXe0ngVxQNcKHKRFKUDJQBFlG7I,16226
+keep/cli.py,sha256=Kwm5kwBiFH92BlKIHRaNN3UTSRexEclgcaPtk3SGpSs,49922
+keep/config.py,sha256=UTDjhUPiGVbBBmkGFgprE_3B8OC13HgtX18cix-NLCc,16226
 keep/context.py,sha256=CNpjmrv6eW2kV1E0MO6qAQfhYKRlfzAL--6v4Mj1nFY,71
 keep/document_store.py,sha256=UswqKIGSc5E-r7Tg9k0g5-byYnuar3e9FieQ7WNod9k,29109
 keep/errors.py,sha256=G9e5FbdfeugyfHOuL_SPZlM5jgWWnwsX4hM7IzanBZc,857
@@ -11,23 +11,24 @@ keep/indexing.py,sha256=dpPYo3WXnIhFDWinz5ZBZVk7_qumeNpP4EpOIY0zMbs,6063
 keep/logging_config.py,sha256=IGwkgIyg-TfYaT4MnoCXfmjeHAe_wsB_XQ1QhVT_ro8,3503
 keep/paths.py,sha256=Dv7pM6oo2QgjL6sj5wPjhuMOK2wqUkfd4Kz08TwJ1ps,3331
 keep/pending_summaries.py,sha256=_irGe7P1Lmog2c5cEgx-BElpq4YJW-tEmF5A3IUZQbQ,5727
-keep/store.py,sha256=SBc2QdTyApdDDVjm2uZQI6tGbV5Hurfetgj7dyTO65o,17881
+keep/store.py,sha256=JjgqxW6NGpQa_FEOl9KIQ39IkRIVWIHd9gRoRdWvEKk,18867
 keep/types.py,sha256=irvUJYUHQgQdVqC4_lgrG0FbTN1BdZqFxZr0ubVPSG4,2314
 keep/data/__init__.py,sha256=C1YARrudHwK2Bmlxkh7dZlIaNJ5m5WrSTglCdG8e3T0,24
 keep/data/system/__init__.py,sha256=Rp92_sBO3kscuWXJomo0HKeHfU-N4BgBeT3-5El0Mcg,28
 keep/data/system/conversations.md,sha256=jE53wYSUyu5uPFNtO1Tu6w4f5QxqLei7muxLF_kZE2s,9837
 keep/data/system/domains.md,sha256=EHE6zU2-lx7UeLqyOTmoWl1WVlvgRq3_QnFb_EZceEY,5584
+keep/data/system/library.md,sha256=rmA4LBtgGOI5vEB9ohjhrLptWNIRvjQCfxuHxY3D4LU,7471
 keep/data/system/now.md,sha256=GyQo_LizSIVKbj5q52q4ErV-nxz8rzUOlkILjgNu25s,388
 keep/providers/__init__.py,sha256=6AwJYc6cF1ZT6BcU_6ATyeWk7MHohdVU2-ccqDSvCHU,1094
-keep/providers/base.py,sha256=7Ug4Kj9fK2Dq4zDcZjn-GKsoZBOAlB9b-FMk969ER-g,14590
+keep/providers/base.py,sha256=qUpVbgLHH4zdvimcM0YMyJnExF7WJ7_U0w_sslpPhSI,15897
 keep/providers/documents.py,sha256=EXeSy5i3RUL0kciIC6w3ldAEfbTIyC5fgfzC_WAI0iY,8211
 keep/providers/embedding_cache.py,sha256=gna6PZEJanbn2GUN0vj1b1MC0xVWePM9cot2KgZUdu8,8856
 keep/providers/embeddings.py,sha256=zi8GyitKexdbCJyU1nLrUhGt_zzPn3udYrrPZ5Ak8Wo,9081
-keep/providers/llm.py,sha256=BxROKOklKbkGsHcSADPNNgWQExgSN6Bg4KPQIxVuB3U,12441
-keep/providers/mlx.py,sha256=aNl00r9tGi5tCGj2ArYH7CmDHtL1jLjVzb1rofU1DAo,9050
+keep/providers/llm.py,sha256=Pcq1fK7NXBzdVrQegjmAFmuHdZXpQraApr8M6O6hJFE,11680
+keep/providers/mlx.py,sha256=xQTXM9kYWUhfqpRVPNCDyF2nkOo50ZYs5DxHELbFB4g,8707
 keep/providers/summarization.py,sha256=MlVTcYipaqp2lT-QYnznp0AMuPVG36QfcTQnvY7Gb-Q,3409
-keep_skill-0.8.1.dist-info/METADATA,sha256=ClBi2sP1kofuCjyjM1UsO8Yo_ouw-XxhLPIpMyDcVjE,6042
-keep_skill-0.8.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-keep_skill-0.8.1.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
-keep_skill-0.8.1.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
-keep_skill-0.8.1.dist-info/RECORD,,
+keep_skill-0.10.0.dist-info/METADATA,sha256=jMADwUfBCyw8r7OIcOJUdpxEEheYRlXqBIAf7VVO6eM,6238
+keep_skill-0.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+keep_skill-0.10.0.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
+keep_skill-0.10.0.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
+keep_skill-0.10.0.dist-info/RECORD,,

{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

keep-skill 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

keep-skill 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl