PyPI - keep-skill - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

keep-skill 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

keep/__init__.py +1 -1
keep/api.py +23 -10
keep/cli.py +300 -32
keep/config.py +29 -1
keep/data/system/library.md +144 -0
keep/providers/base.py +39 -0
keep/providers/llm.py +29 -41
keep/providers/mlx.py +18 -21
keep/store.py +31 -0
{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/METADATA +5 -2
{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/RECORD +14 -13
{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/WHEEL +0 -0
{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/entry_points.txt +0 -0
{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/licenses/LICENSE +0 -0

keep/__init__.py CHANGED Viewed

@@ -40,7 +40,7 @@ if not os.environ.get("KEEP_VERBOSE"):
 from .api import Keeper, NOWDOC_ID
 from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX, INTERNAL_TAGS
-__version__ = "0.7.0"
+__version__ = "0.11.0"
 __all__ = [
     "Keeper",
     "Item",

keep/api.py CHANGED Viewed

@@ -191,6 +191,7 @@ SYSTEM_DOC_IDS = {
     "now.md": "_system:now",
     "conversations.md": "_system:conversations",
     "domains.md": "_system:domains",
+    "library.md": "_system:library",
 }
@@ -404,24 +405,25 @@ class Keeper:
         except Exception as e:
             logger.debug("Error scanning old system docs: %s", e)
-        # Second pass: create any missing system docs from bundled content
+        # Second pass: create or update system docs from bundled content
         for path in SYSTEM_DOC_DIR.glob("*.md"):
             new_id = SYSTEM_DOC_IDS.get(path.name)
             if new_id is None:
                 logger.debug("Skipping unknown system doc: %s", path.name)
                 continue
-            # Skip if already exists
-            if self.exists(new_id):
-                stats["skipped"] += 1
-                continue
             try:
                 content, tags = _load_frontmatter(path)
                 tags["category"] = "system"
+                existed = self.exists(new_id)
+                # remember() handles both create and update (with re-summarization)
                 self.remember(content, id=new_id, tags=tags)
-                stats["created"] += 1
-                logger.info("Created system doc: %s", new_id)
+                if existed:
+                    stats["migrated"] += 1
+                    logger.info("Updated system doc: %s", new_id)
+                else:
+                    stats["created"] += 1
+                    logger.info("Created system doc: %s", new_id)
             except FileNotFoundError:
                 # System file missing - skip silently
                 pass
@@ -1776,10 +1778,18 @@ class Keeper:
     def close(self) -> None:
         """
-        Close resources (embedding cache connection, pending queue, etc.).
+        Close resources (stores, caches, queues).
         Good practice to call when done, though Python's GC will clean up eventually.
         """
+        # Close ChromaDB store
+        if hasattr(self, '_store') and self._store is not None:
+            self._store.close()
+        # Close document store (SQLite)
+        if hasattr(self, '_document_store') and self._document_store is not None:
+            self._document_store.close()
         # Close embedding cache if it was loaded
         if self._embedding_provider is not None:
             if hasattr(self._embedding_provider, '_cache'):
@@ -1802,4 +1812,7 @@ class Keeper:
     def __del__(self):
         """Cleanup on deletion."""
-        self.close()
+        try:
+            self.close()
+        except Exception:
+            pass  # Suppress errors during garbage collection

keep/cli.py CHANGED Viewed

@@ -25,6 +25,7 @@ VERSION_SUFFIX_PATTERN = re.compile(r'@V\{(\d+)\}$')
 _URI_SCHEME_PATTERN = re.compile(r'^[a-zA-Z][a-zA-Z0-9+.-]*://')
 from .api import Keeper, _text_content_id
+from .config import get_tool_directory
 from .document_store import VersionInfo
 from .types import Item
 from .logging_config import configure_quiet_mode, enable_debug_mode
@@ -38,6 +39,13 @@ else:
     configure_quiet_mode(quiet=True)
+def _version_callback(value: bool):
+    if value:
+        from importlib.metadata import version
+        print(f"keep {version('keep-skill')}")
+        raise typer.Exit()
 def _verbose_callback(value: bool):
     if value:
         enable_debug_mode()
@@ -242,6 +250,12 @@ def main_callback(
         callback=_full_callback,
         is_eager=True,
     )] = False,
+    version: Annotated[Optional[bool], typer.Option(
+        "--version",
+        help="Show version and exit",
+        callback=_version_callback,
+        is_eager=True,
+    )] = None,
     store: Annotated[Optional[Path], typer.Option(
         "--store", "-s",
         envvar="KEEP_STORE_PATH",
@@ -285,6 +299,7 @@ CollectionOption = Annotated[
     str,
     typer.Option(
         "--collection", "-c",
+        envvar="KEEP_COLLECTION",
         help="Collection name"
     )
 ]
@@ -442,6 +457,28 @@ def _parse_tags(tags: Optional[list[str]]) -> dict[str, str]:
     return parsed
+def _filter_by_tags(items: list, tags: list[str]) -> list:
+    """
+    Filter items by tag specifications (AND logic).
+    Each tag can be:
+    - "key" - item must have this tag key (any value)
+    - "key=value" - item must have this exact tag
+    """
+    if not tags:
+        return items
+    result = items
+    for t in tags:
+        if "=" in t:
+            key, value = t.split("=", 1)
+            result = [item for item in result if item.tags.get(key) == value]
+        else:
+            # Key only - check if key exists
+            result = [item for item in result if t in item.tags]
+    return result
 def _timestamp() -> str:
     """Generate timestamp for auto-generated IDs."""
     from datetime import datetime, timezone
@@ -475,6 +512,10 @@ def find(
     include_self: Annotated[bool, typer.Option(
         help="Include the queried item (only with --id)"
     )] = False,
+    tag: Annotated[Optional[list[str]], typer.Option(
+        "--tag", "-t",
+        help="Filter by tag (key or key=value, repeatable)"
+    )] = None,
     store: StoreOption = None,
     collection: CollectionOption = "default",
     limit: LimitOption = 10,
@@ -487,6 +528,7 @@ def find(
     Examples:
         keep find "authentication"              # Search by text
         keep find --id file:///path/to/doc.md   # Find similar to item
+        keep find "auth" -t project=myapp       # Search + filter by tag
     """
     if id and query:
         typer.echo("Error: Specify either a query or --id, not both", err=True)
@@ -497,12 +539,19 @@ def find(
     kp = _get_keeper(store, collection)
+    # Search with higher limit if filtering, then post-filter
+    search_limit = limit * 5 if tag else limit
     if id:
-        results = kp.find_similar(id, limit=limit, since=since, include_self=include_self)
+        results = kp.find_similar(id, limit=search_limit, since=since, include_self=include_self)
     else:
-        results = kp.find(query, limit=limit, since=since)
+        results = kp.find(query, limit=search_limit, since=since)
-    typer.echo(_format_items(results, as_json=_get_json_output()))
+    # Post-filter by tags if specified
+    if tag:
+        results = _filter_by_tags(results, tag)
+    typer.echo(_format_items(results[:limit], as_json=_get_json_output()))
 @app.command()
@@ -752,7 +801,7 @@ def now(
     collection: CollectionOption = "default",
     tags: Annotated[Optional[list[str]], typer.Option(
         "--tag", "-t",
-        help="Tag as key=value (can be repeated)"
+        help="Set tag (with content) or filter (without content)"
     )] = None,
 ):
     """
@@ -761,10 +810,17 @@ def now(
     With no arguments, displays the current intentions.
     With content, replaces it.
+    \b
+    Tags behave differently based on mode:
+    - With content: -t sets tags on the update
+    - Without content: -t filters version history
     \b
     Examples:
         keep now                         # Show current intentions
         keep now "What's important now"  # Update intentions
+        keep now "Auth work" -t project=myapp  # Update with tag
+        keep now -t project=myapp        # Find version with tag
         keep now -f context.md           # Read content from file
         keep now --reset                 # Reset to default from system
         keep now -V 1                    # Previous version
@@ -887,18 +943,70 @@ def now(
         item = kp.set_now(new_content, tags=parsed_tags or None)
         typer.echo(_format_item(item, as_json=_get_json_output()))
     else:
-        # Get current intentions with version navigation and similar items
-        item = kp.get_now()
-        version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
-        similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
-        similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
-        typer.echo(_format_item(
-            item,
-            as_json=_get_json_output(),
-            version_nav=version_nav,
-            similar_items=similar_items,
-            similar_offsets=similar_offsets,
-        ))
+        # Get current intentions (or search version history if tags specified)
+        if tags:
+            # Search version history for most recent version with matching tags
+            item = _find_now_version_by_tags(kp, tags, collection)
+            if item is None:
+                typer.echo("No version found matching tags", err=True)
+                raise typer.Exit(1)
+            # No version nav or similar items for filtered results
+            typer.echo(_format_item(item, as_json=_get_json_output()))
+        else:
+            # Standard: get current with version navigation and similar items
+            item = kp.get_now()
+            version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
+            similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
+            similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
+            typer.echo(_format_item(
+                item,
+                as_json=_get_json_output(),
+                version_nav=version_nav,
+                similar_items=similar_items,
+                similar_offsets=similar_offsets,
+            ))
+def _find_now_version_by_tags(kp, tags: list[str], collection: str):
+    """
+    Search nowdoc version history for most recent version matching all tags.
+    Checks current version first, then scans archived versions.
+    """
+    from .api import NOWDOC_ID
+    # Parse tag filters
+    tag_filters = []
+    for t in tags:
+        if "=" in t:
+            key, value = t.split("=", 1)
+            tag_filters.append((key, value))
+        else:
+            tag_filters.append((t, None))  # Key only
+    def matches_tags(item_tags: dict) -> bool:
+        for key, value in tag_filters:
+            if value is not None:
+                if item_tags.get(key) != value:
+                    return False
+            else:
+                if key not in item_tags:
+                    return False
+        return True
+    # Check current version first
+    current = kp.get_now()
+    if current and matches_tags(current.tags):
+        return current
+    # Scan archived versions (newest first)
+    versions = kp.list_versions(NOWDOC_ID, limit=100, collection=collection)
+    for i, v in enumerate(versions):
+        if matches_tags(v.tags):
+            # Found match - get full item at this version offset
+            return kp.get_version(NOWDOC_ID, i + 1, collection=collection)
+    return None
 @app.command()
@@ -920,6 +1028,10 @@ def get(
         "--no-similar",
         help="Suppress similar items in output"
     )] = False,
+    tag: Annotated[Optional[list[str]], typer.Option(
+        "--tag", "-t",
+        help="Require tag (key or key=value, repeatable)"
+    )] = None,
     limit: Annotated[int, typer.Option(
         "--limit", "-n",
         help="Max items for --history or --similar (default: 10)"
@@ -940,6 +1052,7 @@ def get(
         keep get doc:1 --history        # List all versions
         keep get doc:1 --similar        # List similar items
         keep get doc:1 --no-similar     # Suppress similar items
+        keep get doc:1 -t project=myapp # Only if tag matches
     """
     kp = _get_keeper(store, collection)
@@ -1074,6 +1187,13 @@ def get(
             typer.echo(f"Not found: {actual_id}", err=True)
         raise typer.Exit(1)
+    # Check tag filter if specified
+    if tag:
+        filtered = _filter_by_tags([item], tag)
+        if not filtered:
+            typer.echo(f"Tag filter not matched: {actual_id}", err=True)
+            raise typer.Exit(1)
     # Get version navigation
     version_nav = kp.get_version_nav(actual_id, internal_version, collection=collection)
@@ -1153,12 +1273,146 @@ def init(
+def _get_config_value(kp: Keeper, path: str):
+    """
+    Get config value by dotted path.
+    Special paths (not in TOML):
+        file - config file location
+        tool - package directory (SKILL.md location)
+        store - store path
+        collections - list of collections
+    Dotted paths into config:
+        providers - all provider config
+        providers.embedding - embedding provider name
+        providers.summarization - summarization provider name
+        embedding.* - embedding config details
+        summarization.* - summarization config details
+        tags - default tags
+    """
+    cfg = kp._config
+    # Special built-in paths (not in TOML)
+    if path == "file":
+        return str(cfg.config_path) if cfg else None
+    if path == "tool":
+        return str(get_tool_directory())
+    if path == "store":
+        return str(kp._store_path)
+    if path == "collections":
+        return kp.list_collections()
+    # Provider shortcuts
+    if path == "providers":
+        if cfg:
+            return {
+                "embedding": cfg.embedding.name,
+                "summarization": cfg.summarization.name,
+                "document": cfg.document.name,
+            }
+        return None
+    if path == "providers.embedding":
+        return cfg.embedding.name if cfg else None
+    if path == "providers.summarization":
+        return cfg.summarization.name if cfg else None
+    if path == "providers.document":
+        return cfg.document.name if cfg else None
+    # Tags shortcut
+    if path == "tags":
+        return cfg.default_tags if cfg else {}
+    # Dotted path into config attributes
+    if not cfg:
+        raise typer.BadParameter(f"No config loaded, cannot access: {path}")
+    parts = path.split(".")
+    value = cfg
+    for part in parts:
+        if hasattr(value, part):
+            value = getattr(value, part)
+        elif hasattr(value, "params") and part in value.params:
+            # Provider config params
+            value = value.params[part]
+        elif isinstance(value, dict) and part in value:
+            value = value[part]
+        else:
+            raise typer.BadParameter(f"Unknown config path: {path}")
+    # Return name for provider objects
+    if hasattr(value, "name") and hasattr(value, "params"):
+        return value.name
+    return value
+# Settings that may not be configured but are available
+AVAILABLE_SETTINGS = {
+    "tags": {
+        "description": "Default tags applied to all operations",
+        "example": {"project": "myproject", "topic": "mytopic"},
+    },
+}
+def _format_config_with_defaults(kp: Keeper) -> str:
+    """Format config output with commented defaults for unused settings."""
+    cfg = kp._config
+    config_path = cfg.config_path if cfg else None
+    store_path = kp._store_path
+    lines = []
+    # Show paths
+    lines.append(f"file: {config_path}")
+    lines.append(f"tool: {get_tool_directory()}")
+    if cfg and cfg.config_dir and cfg.config_dir.resolve() != store_path.resolve():
+        lines.append(f"store: {store_path}")
+    else:
+        lines.append(f"store: {store_path}")
+    lines.append(f"collections: {kp.list_collections()}")
+    if cfg:
+        lines.append("")
+        lines.append("providers:")
+        lines.append(f"  embedding: {cfg.embedding.name}")
+        lines.append(f"  summarization: {cfg.summarization.name}")
+        lines.append(f"  document: {cfg.document.name}")
+        # Show configured tags if any
+        if cfg.default_tags:
+            lines.append("")
+            lines.append("tags:")
+            for key, value in cfg.default_tags.items():
+                lines.append(f"  {key}: {value}")
+        else:
+            # Show commented example for tags
+            lines.append("")
+            lines.append("# tags:")
+            lines.append("#   project: myproject")
+            lines.append("#   topic: mytopic")
+    return "\n".join(lines)
 @app.command()
 def config(
+    path: Annotated[Optional[str], typer.Argument(
+        help="Config path to get (e.g., 'file', 'tool', 'store', 'providers.embedding')"
+    )] = None,
     store: StoreOption = None,
 ):
     """
-    Show current configuration and store location.
+    Show configuration. Optionally get a specific value by path.
+    \b
+    Examples:
+        keep config              # Show all config
+        keep config file         # Config file location
+        keep config tool         # Package directory (SKILL.md location)
+        keep config store        # Store path
+        keep config providers    # All provider config
+        keep config providers.embedding  # Embedding provider name
     """
     kp = _get_keeper(store, "default")
@@ -1166,28 +1420,42 @@ def config(
     config_path = cfg.config_path if cfg else None
     store_path = kp._store_path
+    # If a specific path is requested, return just that value
+    if path:
+        try:
+            value = _get_config_value(kp, path)
+        except typer.BadParameter as e:
+            typer.echo(str(e), err=True)
+            raise typer.Exit(1)
+        if _get_json_output():
+            typer.echo(json.dumps({path: value}, indent=2))
+        else:
+            # Raw output for shell scripting
+            if isinstance(value, (list, dict)):
+                typer.echo(json.dumps(value))
+            else:
+                typer.echo(value)
+        return
+    # Full config output
     if _get_json_output():
         result = {
+            "file": str(config_path) if config_path else None,
+            "tool": str(get_tool_directory()),
             "store": str(store_path),
-            "config": str(config_path) if config_path else None,
             "collections": kp.list_collections(),
+            "providers": {
+                "embedding": cfg.embedding.name if cfg else None,
+                "summarization": cfg.summarization.name if cfg else None,
+                "document": cfg.document.name if cfg else None,
+            },
         }
-        if cfg:
-            result["embedding"] = cfg.embedding.name
-            result["summarization"] = cfg.summarization.name
+        if cfg and cfg.default_tags:
+            result["tags"] = cfg.default_tags
         typer.echo(json.dumps(result, indent=2))
     else:
-        # Show paths
-        typer.echo(f"Config: {config_path}")
-        if cfg and cfg.config_dir and cfg.config_dir.resolve() != store_path.resolve():
-            typer.echo(f"Store:  {store_path}")
-        typer.echo(f"Collections: {kp.list_collections()}")
-        if cfg:
-            typer.echo(f"\nProviders:")
-            typer.echo(f"  Embedding: {cfg.embedding.name}")
-            typer.echo(f"  Summarization: {cfg.summarization.name}")
+        typer.echo(_format_config_with_defaults(kp))
 @app.command("process-pending")

keep/config.py CHANGED Viewed

@@ -5,6 +5,7 @@ The configuration is stored as a TOML file in the store directory.
 It specifies which providers to use and their parameters.
 """
+import importlib.resources
 import os
 import platform
 import tomllib
@@ -19,7 +20,34 @@ import tomli_w
 CONFIG_FILENAME = "keep.toml"
 CONFIG_VERSION = 3  # Bumped for document versioning support
-SYSTEM_DOCS_VERSION = 1  # Increment when bundled system docs content changes
+SYSTEM_DOCS_VERSION = 3  # Increment when bundled system docs content changes
+def get_tool_directory() -> Path:
+    """
+    Return directory containing SKILL.md (package root).
+    For installed package: SKILL.md is at the same level as the keep/ package.
+    For development: it's at the repository root.
+    """
+    # Get the keep package location
+    keep_pkg = importlib.resources.files("keep")
+    pkg_path = Path(str(keep_pkg))
+    # SKILL.md is one level up from the package
+    tool_dir = pkg_path.parent
+    # Verify SKILL.md exists there
+    if (tool_dir / "SKILL.md").exists():
+        return tool_dir
+    # Fallback: check if we're in a development install
+    # where SKILL.md might be at repository root
+    if pkg_path.name == "keep" and (pkg_path.parent / "SKILL.md").exists():
+        return pkg_path.parent
+    # Last resort: return the package parent anyway
+    return tool_dir
 @dataclass

keep/data/system/library.md ADDED Viewed

@@ -0,0 +1,144 @@
+---
+tags:
+  category: system
+  context: library
+---
+# Library
+Public domain texts for testing and bootstrapping keep.
+The content, as well as the format, is relevant to the practice of this skill.
+## Resolving File Paths
+The library files are located in the `docs/library/` directory of the keep package.
+To construct URIs for these files:
+1. **From shell:** Use `file://$(keep config tool)/docs/library/{filename}`
+2. **In Python:**
+   ```python
+   from importlib.resources import files
+   library_path = files("keep").parent / "docs" / "library"
+   uri = f"file://{library_path}/{filename}"
+   ```
+---
+## Files
+### ancrenewisse.pdf
+- **URI template:** `file://{keep_library}/ancrenewisse.pdf`
+- **Title:** Ancrene Wisse (Ancrene Riwle)
+- **Date:** c. 1200s (13th century)
+- **Language:** Middle English
+- **Translator:** James Morton, The Camden Society, London 1853
+- **Source:** https://www.bsswebsite.me.uk/History/AncreneRiwle/AncreneRiwle2.html
+- **Status:** Public domain
+- **Description:** A monastic guide for Christian anchoresses.  Provides guidance on conduct with an "inner" and "outer" rule, and their relationship: "one relates to the right conduct of the heart; the other, to the regulation of the outward life".
+---
+### impermanence_verse.txt
+- **URI template:** `file://{keep_library}/impermanence_verse.txt`
+- **Title:** 無常偈 (Impermanence Verse / Closing Verse)
+- **Date:** Traditional Zen liturgy (exact origin uncertain)
+- **Language:** Japanese (Kanji/Kana), with romanization and multiple English translations
+- **Source:** Soto Zen liturgy
+- **Status:** Traditional teaching, freely shared
+- **Description:** Four-line verse chanted at the end of Zen practice sessions. "Great is the matter of birth and death / Life slips quickly by / Time waits for no one / Wake up! Wake up!" Includes character-by-character breakdown, cultural context, and linguistic notes.
+---
+### mn61.html
+- **URI template:** `file://{keep_library}/mn61.html`
+- **Title:** Ambalaṭṭhikārāhulovāda Sutta (MN 61) - The Exhortation to Rāhula at Mango Stone
+- **Date:** Original: ~5th century BCE; Translation: contemporary
+- **Language:** English translation from Pali
+- **Translator:** Thanissaro Bhikkhu
+- **Source:** https://www.dhammatalks.org/suttas/MN/MN61.html
+- **Format:** Raw HTML (complete with markup, navigation, footnotes)
+- **License:** Freely distributed for educational use
+- **Description:** Buddha's teaching to his son Rāhula on reflection before, during, and after bodily, verbal, and mental actions. The triple-check pattern: reflect before acting/speaking, check while doing, review after. Mirror metaphor for self-reflection.
+**Format note:** Kept as raw HTML to test document processing and summarization on markup-heavy content.
+---
+### an5.57_translation-en-sujato.json
+- **URI template:** `file://{keep_library}/an5.57_translation-en-sujato.json`
+- **Title:** Upajjhāyasutta (AN 5.57) - Subjects for Regular Reviewing
+- **Date:** Original: ~5th century BCE; Translation: modern
+- **Language:** English translation from Pali
+- **Translator:** Bhikkhu Sujato
+- **Source:** SuttaCentral
+- **Source URL:** https://suttacentral.net/an5.57/en/sujato?lang=en
+- **Data:** https://github.com/suttacentral/sc-data/blob/main/sc_bilara_data/translation/en/sujato/sutta/an/an5/an5.57_translation-en-sujato.json
+- **License:** Creative Commons CC0 1.0 Universal (SuttaCentral translations)
+- **Description:** The Five Remembrances - five subjects that all sentient beings should reflect on regularly: aging, sickness, death, separation from loved ones, and being heir to one's own actions.  "Reviewing this subject often, they entirely give up bad conduct, or at least reduce it".
+---
+### fortytwo_chapters.txt
+- **URI template:** `file://{keep_library}/fortytwo_chapters.txt`
+- **Title:** 佛說四十二章經 (Sutra of Forty-Two Chapters)
+- **Date:** Eastern Han Dynasty (25-220 CE)
+- **Language:** Classical Chinese
+- **Source:** Project Gutenberg (#23585)
+- **Status:** Public domain
+- **Description:** One of the earliest Buddhist texts to reach China, traditionally attributed to translation by Kāśyapa Mātaṅga and Dharmarakṣa
+---
+### mumford_sticks_and_stones.txt
+- **URI template:** `file://{keep_library}/mumford_sticks_and_stones.txt`
+- **Title:** Sticks and Stones: A Study of American Architecture and Civilization
+- **Author:** Lewis Mumford (1895-1990)
+- **Date:** 1924
+- **Language:** English
+- **Source:** Internet Archive (sticksstones0000lewi)
+- **Status:** Public domain (published before 1929)
+- **Description:** Mumford's first major work on architecture, examining American building traditions from medieval influences through industrialization. Includes chapters on "The Medieval Tradition," "The Renaissance in New England," "The Age of Rationalism," and more.
+**Note:** This is OCR text from archive.org. Quality is good but may contain occasional scanning artifacts.
+---
+### true_person_no_rank.md
+- **URI template:** `file://{keep_library}/true_person_no_rank.md`
+- **Title:** 無位真人 (The True Person of No Rank)
+- **Date:** Original: 9th century CE; Commentary layers: 9th-20th centuries
+- **Language:** Chinese (verified original text) with English translation and commentary
+- **Source:** Record of Linji (臨濟錄, Línjì Lù); Book of Serenity (從容錄) Case 38
+- **Primary sources:** DILA Buddhist Dictionary, multiple scholarly translations
+- **Status:** Core teaching in public domain; compiled with verification notes
+- **Description:** Linji Yixuan's famous teaching: "Within this mass of red flesh, there is a true person of no rank, constantly coming and going through the gates of your face." Multi-layered document exploring the original teaching, koan tradition, Dōgen's commentary, modern interpretations, and linguistic analysis. Includes Chinese text (verified), translations, and commentary relationships.
+---
+## Usage for Testing
+These texts provide diverse test cases for keep:
+1. **Different languages:** English, Chinese (Classical and modern romanization), Japanese, Middle English, Pali (via translation)
+2. **Different formats:** PDF, plain text, JSON, Markdown, HTML (with markup)
+3. **Different domains:** Buddhist teachings, Zen liturgy, architectural criticism, medieval instructional prose
+4. **Different writing styles:** Ancient scripture, koan commentary, scholarly analysis, liturgical verse, teaching notes
+5. **Different lengths:** Four-line verses to full books
+6. **Different structures:** Linear narratives, multi-layered commentaries, character-by-character analysis, mirror patterns, web documents with navigation
+7. **Multilingual content:** Japanese-English parallel texts, Chinese with romanization, cross-linguistic terminology
+8. **Processing challenges:** Markdown, UTF-8 plaintext, OCR artifacts (Mumford), HTML markup (MN 61), PDF extraction (Ancrene Wisse), structured JSON data (AN5.57).
+---
+## Adding More Test Data
+When adding public domain texts:
+1. Verify their relevance to the practice of this skill
+2. Verify compatibility with the MIT license, e.g. public domain status (pre-1929 for US, or explicit license)
+3. Include source URL (Project Gutenberg, archive.org, etc.)
+4. Add metadata to this index
+---
+## License
+Each text retains its original license status (public domain or Creative Commons as noted above). This index and dataset organization is released under CC0 1.0.

keep/providers/base.py CHANGED Viewed

@@ -148,6 +148,45 @@ class EmbeddingProvider(Protocol):
 # Summarization
 # -----------------------------------------------------------------------------
+# Shared system prompt for all LLM-based summarization providers
+SUMMARIZATION_SYSTEM_PROMPT = """You are a precise summarization assistant.
+Create a concise summary of the provided document that captures:
+- The main purpose or topic
+- Key points or functionality
+- Important details that would help someone decide if this document is relevant
+IMPORTANT: Start the summary directly with the content. Do NOT begin with phrases like:
+- "Here is a concise summary"
+- "This document describes"
+- "The document covers"
+- "Summary:"
+Just state the facts directly. Keep the summary under 200 words."""
+def strip_summary_preamble(text: str) -> str:
+    """
+    Remove common LLM preambles from summaries.
+    Many models add introductory phrases despite instructions not to.
+    This post-processes the output to strip them.
+    """
+    import re
+    preambles = [
+        r"^here is a concise summary[^:]*:\s*",
+        r"^here is the summary[^:]*:\s*",
+        r"^here's a summary[^:]*:\s*",
+        r"^summary:\s*",
+        r"^the document describes\s+",
+        r"^this document describes\s+",
+        r"^the document covers\s+",
+        r"^this document covers\s+",
+    ]
+    result = text
+    for pattern in preambles:
+        result = re.sub(pattern, "", result, flags=re.IGNORECASE)
+    return result
 @runtime_checkable
 class SummarizationProvider(Protocol):
     """

keep/providers/llm.py CHANGED Viewed

@@ -6,7 +6,13 @@ import json
 import os
 from typing import Any
-from .base import SummarizationProvider, TaggingProvider, get_registry
+from .base import (
+    SummarizationProvider,
+    TaggingProvider,
+    get_registry,
+    SUMMARIZATION_SYSTEM_PROMPT,
+    strip_summary_preamble,
+)
 # -----------------------------------------------------------------------------
@@ -16,19 +22,11 @@ from .base import SummarizationProvider, TaggingProvider, get_registry
 class AnthropicSummarization:
     """
     Summarization provider using Anthropic's Claude API.
     Requires: ANTHROPIC_API_KEY environment variable.
     Optionally reads from OpenClaw config via OPENCLAW_CONFIG env var.
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
     def __init__(
         self,
         model: str = "claude-3-5-haiku-20241022",
@@ -56,22 +54,22 @@ Be factual and specific. Do not include phrases like "This document" - just stat
         """Generate summary using Anthropic Claude."""
         # Truncate very long content
         truncated = content[:50000] if len(content) > 50000 else content
         try:
             response = self.client.messages.create(
                 model=self.model,
                 max_tokens=self.max_tokens,
-                system=self.SYSTEM_PROMPT,
+                system=SUMMARIZATION_SYSTEM_PROMPT,
                 messages=[
                     {"role": "user", "content": truncated}
                 ],
             )
             # Extract text from response
             if response.content and len(response.content) > 0:
-                return response.content[0].text
+                return strip_summary_preamble(response.content[0].text)
             return truncated[:500]  # Fallback
-        except Exception as e:
+        except Exception:
             # Fallback to truncation on error
             return truncated[:500]
@@ -79,18 +77,10 @@ Be factual and specific. Do not include phrases like "This document" - just stat
 class OpenAISummarization:
     """
     Summarization provider using OpenAI's chat API.
     Requires: KEEP_OPENAI_API_KEY or OPENAI_API_KEY environment variable.
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
     def __init__(
         self,
         model: str = "gpt-4o-mini",
@@ -101,41 +91,39 @@ Be factual and specific. Do not include phrases like "This document" - just stat
             from openai import OpenAI
         except ImportError:
             raise RuntimeError("OpenAISummarization requires 'openai' library")
         self.model = model
         self.max_tokens = max_tokens
         key = api_key or os.environ.get("KEEP_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
         if not key:
             raise ValueError("OpenAI API key required")
         self._client = OpenAI(api_key=key)
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using OpenAI."""
         # Truncate very long content to avoid token limits
         truncated = content[:50000] if len(content) > 50000 else content
         response = self._client.chat.completions.create(
             model=self.model,
             messages=[
-                {"role": "system", "content": self.SYSTEM_PROMPT},
+                {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                 {"role": "user", "content": truncated},
             ],
             max_tokens=self.max_tokens,
             temperature=0.3,
         )
-        return response.choices[0].message.content.strip()
+        return strip_summary_preamble(response.choices[0].message.content.strip())
 class OllamaSummarization:
     """
     Summarization provider using Ollama's local API.
     """
-    SYSTEM_PROMPT = OpenAISummarization.SYSTEM_PROMPT
     def __init__(
         self,
         model: str = "llama3.2",
@@ -143,27 +131,27 @@ class OllamaSummarization:
     ):
         self.model = model
         self.base_url = base_url.rstrip("/")
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using Ollama."""
         import requests
         truncated = content[:50000] if len(content) > 50000 else content
         response = requests.post(
             f"{self.base_url}/api/chat",
             json={
                 "model": self.model,
                 "messages": [
-                    {"role": "system", "content": self.SYSTEM_PROMPT},
+                    {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                     {"role": "user", "content": truncated},
                 ],
                 "stream": False,
             },
         )
         response.raise_for_status()
-        return response.json()["message"]["content"].strip()
+        return strip_summary_preamble(response.json()["message"]["content"].strip())
 class PassthroughSummarization:

keep/providers/mlx.py CHANGED Viewed

@@ -10,7 +10,13 @@ Requires: pip install mlx-lm mlx
 import os
 from typing import Any
-from .base import EmbeddingProvider, SummarizationProvider, get_registry
+from .base import (
+    EmbeddingProvider,
+    SummarizationProvider,
+    get_registry,
+    SUMMARIZATION_SYSTEM_PROMPT,
+    strip_summary_preamble,
+)
 class MLXEmbedding:
@@ -75,21 +81,12 @@ class MLXEmbedding:
 class MLXSummarization:
     """
     Summarization provider using MLX-LM on Apple Silicon.
     Runs local LLMs optimized for Apple Silicon. No API key required.
     Requires: pip install mlx-lm
     """
-    SYSTEM_PROMPT = """You are a precise summarization assistant.
-Create a concise summary of the provided document that captures:
-- The main purpose or topic
-- Key points or functionality
-- Important details that would help someone decide if this document is relevant
-Be factual and specific. Do not include phrases like "This document" - just state the content directly.
-Keep the summary under 200 words."""
     def __init__(
         self,
         model: str = "mlx-community/Llama-3.2-3B-Instruct-4bit",
@@ -122,27 +119,27 @@ Keep the summary under 200 words."""
     def summarize(self, content: str, *, max_length: int = 500) -> str:
         """Generate a summary using MLX-LM."""
         from mlx_lm import generate
         # Truncate very long content to fit context window
         # Most models have 4k-8k context, leave room for prompt and response
         max_content_chars = 12000
         truncated = content[:max_content_chars] if len(content) > max_content_chars else content
         # Format as chat (works with instruction-tuned models)
         if hasattr(self._tokenizer, "apply_chat_template"):
             messages = [
-                {"role": "system", "content": self.SYSTEM_PROMPT},
+                {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
                 {"role": "user", "content": f"Summarize the following:\n\n{truncated}"},
             ]
             prompt = self._tokenizer.apply_chat_template(
-                messages,
-                tokenize=False,
+                messages,
+                tokenize=False,
                 add_generation_prompt=True
             )
         else:
             # Fallback for models without chat template
-            prompt = f"{self.SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
+            prompt = f"{SUMMARIZATION_SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
         # Generate
         response = generate(
             self._model,
@@ -151,8 +148,8 @@ Keep the summary under 200 words."""
             max_tokens=self.max_tokens,
             verbose=False,
         )
-        return response.strip()
+        return strip_summary_preamble(response.strip())
 class MLXTagging:

keep/store.py CHANGED Viewed

@@ -556,3 +556,34 @@ class ChromaStore:
         """Return the number of items in a collection."""
         coll = self._get_collection(collection)
         return coll.count()
+    # -------------------------------------------------------------------------
+    # Resource Management
+    # -------------------------------------------------------------------------
+    def close(self) -> None:
+        """
+        Close ChromaDB client and release resources.
+        Good practice to call when done, though Python's GC will clean up eventually.
+        """
+        self._collections.clear()
+        # ChromaDB PersistentClient doesn't have explicit close(),
+        # but clearing references allows garbage collection
+        self._client = None
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - close resources."""
+        self.close()
+        return False
+    def __del__(self):
+        """Cleanup on deletion."""
+        try:
+            self.close()
+        except Exception:
+            pass  # Suppress errors during garbage collection

{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keep-skill
-Version: 0.9.0
+Version: 0.11.0
 Summary: Reflective memory - remember and search documents by meaning
 Project-URL: Homepage, https://github.com/hughpyle/keep
 Project-URL: Repository, https://github.com/hughpyle/keep
@@ -24,6 +24,7 @@ Requires-Dist: typer>=0.9
 Provides-Extra: anthropic
 Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
 Provides-Extra: dev
+Requires-Dist: google-genai>=1.0.0; extra == 'dev'
 Requires-Dist: pytest-cov>=4.0; extra == 'dev'
 Requires-Dist: pytest>=7.0; extra == 'dev'
 Provides-Extra: documents
@@ -60,7 +61,7 @@ uv tool install 'keep-skill[local]'
 keep init
 # Index content
-keep update path/to/document.md -t project=myapp
+keep update file:///path/to/document.md -t project=myapp
 keep update "Rate limit is 100 req/min" -t topic=api
 # Search by meaning
@@ -115,6 +116,7 @@ keep init                              # Creates .keep/ at repo root
 # Index files and notes
 keep update file:///path/to/doc.md -t project=myapp
+keep update "Token refresh needs clock sync" -t topic=auth
 keep update "Important insight" -t type=note
 # Search
@@ -129,6 +131,7 @@ keep get ID --history                  # All versions
 # Tags
 keep list --tag project=myapp          # Find by tag
+keep find "auth" -t topic=auth         # Cross-project topic search
 keep list --tags=                      # List all tag keys
 # Current intentions

{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-keep/__init__.py,sha256=yBK7jvbQmx9fRBGanICNgrQdyCQHzt5bNPp098Qvh9E,1621
+keep/__init__.py,sha256=Z9SmBjE91Ip02X-w1qAYEFwsHlzGkvqGcm5BN7qhw0A,1622
 keep/__main__.py,sha256=3Uu70IhIDIjh8OW6jp9jQQ3dF2lKdJWi_3FtRIQMiMY,104
-keep/api.py,sha256=3DHqgIsX7Fu6SwpxHwOnCA5ao0S9DlPschfK4WlN_Hs,64538
+keep/api.py,sha256=GGMY6w2a1b-Yve1aWz35dhZuSA9Va5Whe6M1Ek87yw4,65105
 keep/chunking.py,sha256=neAXOLSvVwbUxapbqq7nZrbSNSzMXuhxj-ODoOSodsU,11830
-keep/cli.py,sha256=m3yokiLUbz1SWUD2IQEhI1WieY3IKVdpLV-f8Nd-BP8,45859
-keep/config.py,sha256=hWjiJDg2u6p8IJpksXe0ngVxQNcKHKRFKUDJQBFlG7I,16226
+keep/cli.py,sha256=82gXDpunX5LpzfKdrRidpPirnurFDZ6pPVBWdXAhwPc,54836
+keep/config.py,sha256=YJ4IN85Y8HqrOxV2qTkGCMOuBcGNLDf0V2FK4KsZD-4,17079
 keep/context.py,sha256=CNpjmrv6eW2kV1E0MO6qAQfhYKRlfzAL--6v4Mj1nFY,71
 keep/document_store.py,sha256=UswqKIGSc5E-r7Tg9k0g5-byYnuar3e9FieQ7WNod9k,29109
 keep/errors.py,sha256=G9e5FbdfeugyfHOuL_SPZlM5jgWWnwsX4hM7IzanBZc,857
@@ -11,23 +11,24 @@ keep/indexing.py,sha256=dpPYo3WXnIhFDWinz5ZBZVk7_qumeNpP4EpOIY0zMbs,6063
 keep/logging_config.py,sha256=IGwkgIyg-TfYaT4MnoCXfmjeHAe_wsB_XQ1QhVT_ro8,3503
 keep/paths.py,sha256=Dv7pM6oo2QgjL6sj5wPjhuMOK2wqUkfd4Kz08TwJ1ps,3331
 keep/pending_summaries.py,sha256=_irGe7P1Lmog2c5cEgx-BElpq4YJW-tEmF5A3IUZQbQ,5727
-keep/store.py,sha256=SBc2QdTyApdDDVjm2uZQI6tGbV5Hurfetgj7dyTO65o,17881
+keep/store.py,sha256=JjgqxW6NGpQa_FEOl9KIQ39IkRIVWIHd9gRoRdWvEKk,18867
 keep/types.py,sha256=irvUJYUHQgQdVqC4_lgrG0FbTN1BdZqFxZr0ubVPSG4,2314
 keep/data/__init__.py,sha256=C1YARrudHwK2Bmlxkh7dZlIaNJ5m5WrSTglCdG8e3T0,24
 keep/data/system/__init__.py,sha256=Rp92_sBO3kscuWXJomo0HKeHfU-N4BgBeT3-5El0Mcg,28
 keep/data/system/conversations.md,sha256=jE53wYSUyu5uPFNtO1Tu6w4f5QxqLei7muxLF_kZE2s,9837
 keep/data/system/domains.md,sha256=EHE6zU2-lx7UeLqyOTmoWl1WVlvgRq3_QnFb_EZceEY,5584
+keep/data/system/library.md,sha256=KFDRN7YCPwxttghcb-ts6je9fd2Mlysk_5H6vrwmgX0,7438
 keep/data/system/now.md,sha256=GyQo_LizSIVKbj5q52q4ErV-nxz8rzUOlkILjgNu25s,388
 keep/providers/__init__.py,sha256=6AwJYc6cF1ZT6BcU_6ATyeWk7MHohdVU2-ccqDSvCHU,1094
-keep/providers/base.py,sha256=7Ug4Kj9fK2Dq4zDcZjn-GKsoZBOAlB9b-FMk969ER-g,14590
+keep/providers/base.py,sha256=qUpVbgLHH4zdvimcM0YMyJnExF7WJ7_U0w_sslpPhSI,15897
 keep/providers/documents.py,sha256=EXeSy5i3RUL0kciIC6w3ldAEfbTIyC5fgfzC_WAI0iY,8211
 keep/providers/embedding_cache.py,sha256=gna6PZEJanbn2GUN0vj1b1MC0xVWePM9cot2KgZUdu8,8856
 keep/providers/embeddings.py,sha256=zi8GyitKexdbCJyU1nLrUhGt_zzPn3udYrrPZ5Ak8Wo,9081
-keep/providers/llm.py,sha256=BxROKOklKbkGsHcSADPNNgWQExgSN6Bg4KPQIxVuB3U,12441
-keep/providers/mlx.py,sha256=aNl00r9tGi5tCGj2ArYH7CmDHtL1jLjVzb1rofU1DAo,9050
+keep/providers/llm.py,sha256=Pcq1fK7NXBzdVrQegjmAFmuHdZXpQraApr8M6O6hJFE,11680
+keep/providers/mlx.py,sha256=xQTXM9kYWUhfqpRVPNCDyF2nkOo50ZYs5DxHELbFB4g,8707
 keep/providers/summarization.py,sha256=MlVTcYipaqp2lT-QYnznp0AMuPVG36QfcTQnvY7Gb-Q,3409
-keep_skill-0.9.0.dist-info/METADATA,sha256=nzvsFdQRS659llg52q-GkZv9CPKF9jX9hx-cP5i-c6k,6051
-keep_skill-0.9.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-keep_skill-0.9.0.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
-keep_skill-0.9.0.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
-keep_skill-0.9.0.dist-info/RECORD,,
+keep_skill-0.11.0.dist-info/METADATA,sha256=AWE9565tLR2SJH-sPkdALnoGZVAj9o5OzZMY18Ux-TA,6238
+keep_skill-0.11.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+keep_skill-0.11.0.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
+keep_skill-0.11.0.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
+keep_skill-0.11.0.dist-info/RECORD,,

{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

keep-skill 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

keep-skill 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl