keep-skill 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keep/logging_config.py ADDED
@@ -0,0 +1,73 @@
1
+ """
2
+ Logging configuration for keep.
3
+
4
+ Suppress verbose library output by default for better UX.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import warnings
10
+
11
+ # Set environment variables BEFORE any imports to suppress warnings early
12
+ if not os.environ.get("KEEP_VERBOSE"):
13
+ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
14
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
15
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
16
+ os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
17
+ os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
18
+
19
+
20
+ def configure_quiet_mode(quiet: bool = True):
21
+ """
22
+ Configure logging to suppress verbose library output.
23
+
24
+ This silences:
25
+ - HuggingFace transformers progress bars
26
+ - MLX model loading messages
27
+ - Library warnings (deprecation, etc.)
28
+
29
+ Args:
30
+ quiet: If True, suppress verbose output. If False, show everything.
31
+ """
32
+ if quiet:
33
+ # Suppress HuggingFace progress bars and warnings
34
+ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
35
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
36
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
37
+
38
+ # Suppress Python warnings (including deprecation warnings)
39
+ warnings.filterwarnings("ignore")
40
+
41
+ # Suppress MLX verbosity if available
42
+ try:
43
+ import mlx.core as mx
44
+ # MLX doesn't have a global verbosity setting currently,
45
+ # but we can redirect stderr if needed
46
+ except ImportError:
47
+ pass
48
+
49
+ # Configure Python logging to be less verbose
50
+ import logging
51
+ logging.getLogger("transformers").setLevel(logging.ERROR)
52
+ logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
53
+ logging.getLogger("mlx").setLevel(logging.ERROR)
54
+ logging.getLogger("chromadb").setLevel(logging.ERROR)
55
+
56
+
57
+ def enable_verbose_mode():
58
+ """Re-enable verbose output for debugging."""
59
+ configure_quiet_mode(quiet=False)
60
+
61
+ # Restore defaults
62
+ os.environ.pop("HF_HUB_DISABLE_PROGRESS_BARS", None)
63
+ os.environ.pop("TRANSFORMERS_VERBOSITY", None)
64
+
65
+ # Re-enable warnings
66
+ warnings.filterwarnings("default")
67
+
68
+ # Reset logging levels
69
+ import logging
70
+ logging.getLogger("transformers").setLevel(logging.INFO)
71
+ logging.getLogger("sentence_transformers").setLevel(logging.INFO)
72
+ logging.getLogger("mlx").setLevel(logging.INFO)
73
+ logging.getLogger("chromadb").setLevel(logging.INFO)
keep/paths.py ADDED
@@ -0,0 +1,67 @@
1
+ """
2
+ Utility functions for locating paths.
3
+ """
4
+
5
+ import os
6
+ import warnings
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ def find_git_root(start_path: Optional[Path] = None) -> Optional[Path]:
12
+ """
13
+ Find the root of the git repository containing the given path.
14
+
15
+ Args:
16
+ start_path: Path to start searching from. Defaults to cwd.
17
+
18
+ Returns:
19
+ Path to git root, or None if not in a git repository.
20
+ """
21
+ if start_path is None:
22
+ start_path = Path.cwd()
23
+
24
+ current = start_path.resolve()
25
+
26
+ while current != current.parent:
27
+ if (current / ".git").exists():
28
+ return current
29
+ current = current.parent
30
+
31
+ # Check root as well
32
+ if (current / ".git").exists():
33
+ return current
34
+
35
+ return None
36
+
37
+
38
+ def get_default_store_path() -> Path:
39
+ """
40
+ Get the default store path.
41
+
42
+ Priority:
43
+ 1. KEEP_STORE_PATH environment variable
44
+ 2. .keep/ directory at git repository root
45
+ 3. ~/.keep/ in user's home directory (if not in a repo)
46
+
47
+ Returns:
48
+ Path to the store directory (may not exist yet).
49
+ """
50
+ # Check environment variable first
51
+ env_path = os.environ.get("KEEP_STORE_PATH")
52
+ if env_path:
53
+ return Path(env_path).resolve()
54
+
55
+ # Try to find git root
56
+ git_root = find_git_root()
57
+ if git_root:
58
+ return git_root / ".keep"
59
+
60
+ # Fall back to home directory with warning
61
+ home = Path.home()
62
+ warnings.warn(
63
+ f"Not in a git repository. Using {home / '.keep'} for storage. "
64
+ f"Set KEEP_STORE_PATH to specify a different location.",
65
+ stacklevel=2,
66
+ )
67
+ return home / ".keep"
@@ -0,0 +1,166 @@
1
+ """
2
+ Pending summaries queue using SQLite.
3
+
4
+ Stores content that needs summarization for later processing.
5
+ This enables fast indexing with lazy summarization.
6
+ """
7
+
8
+ import sqlite3
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+
15
+ @dataclass
16
+ class PendingSummary:
17
+ """A queued item awaiting summarization."""
18
+ id: str
19
+ collection: str
20
+ content: str
21
+ queued_at: str
22
+ attempts: int = 0
23
+
24
+
25
+ class PendingSummaryQueue:
26
+ """
27
+ SQLite-backed queue for pending summarizations.
28
+
29
+ Items are added during fast indexing (with truncated placeholder summary)
30
+ and processed later by `keep process-pending` or programmatically.
31
+ """
32
+
33
+ def __init__(self, queue_path: Path):
34
+ """
35
+ Args:
36
+ queue_path: Path to SQLite database file
37
+ """
38
+ self._queue_path = queue_path
39
+ self._conn: Optional[sqlite3.Connection] = None
40
+ self._init_db()
41
+
42
+ def _init_db(self) -> None:
43
+ """Initialize the SQLite database."""
44
+ self._queue_path.parent.mkdir(parents=True, exist_ok=True)
45
+ self._conn = sqlite3.connect(str(self._queue_path), check_same_thread=False)
46
+ self._conn.execute("""
47
+ CREATE TABLE IF NOT EXISTS pending_summaries (
48
+ id TEXT NOT NULL,
49
+ collection TEXT NOT NULL,
50
+ content TEXT NOT NULL,
51
+ queued_at TEXT NOT NULL,
52
+ attempts INTEGER DEFAULT 0,
53
+ PRIMARY KEY (id, collection)
54
+ )
55
+ """)
56
+ self._conn.execute("""
57
+ CREATE INDEX IF NOT EXISTS idx_queued_at
58
+ ON pending_summaries(queued_at)
59
+ """)
60
+ self._conn.commit()
61
+
62
+ def enqueue(self, id: str, collection: str, content: str) -> None:
63
+ """
64
+ Add an item to the pending queue.
65
+
66
+ If the same id+collection already exists, replaces it (newer content wins).
67
+ """
68
+ now = datetime.now(timezone.utc).isoformat()
69
+ self._conn.execute("""
70
+ INSERT OR REPLACE INTO pending_summaries
71
+ (id, collection, content, queued_at, attempts)
72
+ VALUES (?, ?, ?, ?, 0)
73
+ """, (id, collection, content, now))
74
+ self._conn.commit()
75
+
76
+ def dequeue(self, limit: int = 10) -> list[PendingSummary]:
77
+ """
78
+ Get the oldest pending items for processing.
79
+
80
+ Items are returned but not removed - call `complete()` after successful processing.
81
+ Increments attempt counter on each dequeue.
82
+ """
83
+ cursor = self._conn.execute("""
84
+ SELECT id, collection, content, queued_at, attempts
85
+ FROM pending_summaries
86
+ ORDER BY queued_at ASC
87
+ LIMIT ?
88
+ """, (limit,))
89
+
90
+ items = []
91
+ for row in cursor.fetchall():
92
+ items.append(PendingSummary(
93
+ id=row[0],
94
+ collection=row[1],
95
+ content=row[2],
96
+ queued_at=row[3],
97
+ attempts=row[4],
98
+ ))
99
+
100
+ # Increment attempt counters
101
+ if items:
102
+ ids = [(item.id, item.collection) for item in items]
103
+ self._conn.executemany("""
104
+ UPDATE pending_summaries
105
+ SET attempts = attempts + 1
106
+ WHERE id = ? AND collection = ?
107
+ """, ids)
108
+ self._conn.commit()
109
+
110
+ return items
111
+
112
+ def complete(self, id: str, collection: str) -> None:
113
+ """Remove an item from the queue after successful processing."""
114
+ self._conn.execute("""
115
+ DELETE FROM pending_summaries
116
+ WHERE id = ? AND collection = ?
117
+ """, (id, collection))
118
+ self._conn.commit()
119
+
120
+ def count(self) -> int:
121
+ """Get count of pending items."""
122
+ cursor = self._conn.execute("SELECT COUNT(*) FROM pending_summaries")
123
+ return cursor.fetchone()[0]
124
+
125
+ def stats(self) -> dict:
126
+ """Get queue statistics."""
127
+ cursor = self._conn.execute("""
128
+ SELECT
129
+ COUNT(*) as total,
130
+ COUNT(DISTINCT collection) as collections,
131
+ MAX(attempts) as max_attempts,
132
+ MIN(queued_at) as oldest
133
+ FROM pending_summaries
134
+ """)
135
+ row = cursor.fetchone()
136
+ return {
137
+ "pending": row[0],
138
+ "collections": row[1],
139
+ "max_attempts": row[2] or 0,
140
+ "oldest": row[3],
141
+ "queue_path": str(self._queue_path),
142
+ }
143
+
144
+ def clear(self) -> int:
145
+ """Clear all pending items. Returns count of items cleared."""
146
+ count = self.count()
147
+ self._conn.execute("DELETE FROM pending_summaries")
148
+ self._conn.commit()
149
+ return count
150
+
151
+ def close(self) -> None:
152
+ """Close the database connection."""
153
+ if self._conn is not None:
154
+ self._conn.close()
155
+ self._conn = None
156
+
157
+ def __enter__(self):
158
+ return self
159
+
160
+ def __exit__(self, exc_type, exc_val, exc_tb):
161
+ self.close()
162
+ return False
163
+
164
+ def __del__(self):
165
+ """Ensure connection is closed on garbage collection."""
166
+ self.close()
@@ -0,0 +1,40 @@
1
+ """
2
+ Provider interfaces for associative memory services.
3
+
4
+ Each provider type defines a protocol that concrete implementations must follow.
5
+ Providers are configured at store initialization and handle the heavy lifting of:
6
+ - Embedding generation (for semantic search)
7
+ - Summarization (for human-readable recall)
8
+ - Tagging (for structured navigation)
9
+ - Document fetching (for URI resolution)
10
+
11
+ Concrete providers are lazily loaded when first requested via the registry.
12
+ This avoids import-time failures when optional dependencies are missing.
13
+ """
14
+
15
+ from .base import (
16
+ Document,
17
+ EmbeddingProvider,
18
+ SummarizationProvider,
19
+ TaggingProvider,
20
+ DocumentProvider,
21
+ ProviderRegistry,
22
+ get_registry,
23
+ )
24
+
25
+ # Providers are now loaded lazily by ProviderRegistry._ensure_providers_loaded()
26
+ # This avoids import-time failures when optional dependencies are missing
27
+
28
+ __all__ = [
29
+ # Protocols
30
+ "EmbeddingProvider",
31
+ "SummarizationProvider",
32
+ "TaggingProvider",
33
+ "DocumentProvider",
34
+ # Data types
35
+ "Document",
36
+ # Registry
37
+ "ProviderRegistry",
38
+ "get_registry",
39
+ ]
40
+