keep-skill 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +53 -0
- keep/__main__.py +8 -0
- keep/api.py +686 -0
- keep/chunking.py +364 -0
- keep/cli.py +503 -0
- keep/config.py +323 -0
- keep/context.py +127 -0
- keep/indexing.py +208 -0
- keep/logging_config.py +73 -0
- keep/paths.py +67 -0
- keep/pending_summaries.py +166 -0
- keep/providers/__init__.py +40 -0
- keep/providers/base.py +416 -0
- keep/providers/documents.py +250 -0
- keep/providers/embedding_cache.py +260 -0
- keep/providers/embeddings.py +245 -0
- keep/providers/llm.py +371 -0
- keep/providers/mlx.py +256 -0
- keep/providers/summarization.py +107 -0
- keep/store.py +403 -0
- keep/types.py +65 -0
- keep_skill-0.1.0.dist-info/METADATA +290 -0
- keep_skill-0.1.0.dist-info/RECORD +26 -0
- keep_skill-0.1.0.dist-info/WHEEL +4 -0
- keep_skill-0.1.0.dist-info/entry_points.txt +2 -0
- keep_skill-0.1.0.dist-info/licenses/LICENSE +21 -0
keep/logging_config.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging configuration for keep.
|
|
3
|
+
|
|
4
|
+
Suppress verbose library output by default for better UX.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
# Set environment variables BEFORE any imports to suppress warnings early
|
|
12
|
+
if not os.environ.get("KEEP_VERBOSE"):
|
|
13
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
14
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
15
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
16
|
+
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
|
17
|
+
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def configure_quiet_mode(quiet: bool = True):
|
|
21
|
+
"""
|
|
22
|
+
Configure logging to suppress verbose library output.
|
|
23
|
+
|
|
24
|
+
This silences:
|
|
25
|
+
- HuggingFace transformers progress bars
|
|
26
|
+
- MLX model loading messages
|
|
27
|
+
- Library warnings (deprecation, etc.)
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
quiet: If True, suppress verbose output. If False, show everything.
|
|
31
|
+
"""
|
|
32
|
+
if quiet:
|
|
33
|
+
# Suppress HuggingFace progress bars and warnings
|
|
34
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
35
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
36
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
37
|
+
|
|
38
|
+
# Suppress Python warnings (including deprecation warnings)
|
|
39
|
+
warnings.filterwarnings("ignore")
|
|
40
|
+
|
|
41
|
+
# Suppress MLX verbosity if available
|
|
42
|
+
try:
|
|
43
|
+
import mlx.core as mx
|
|
44
|
+
# MLX doesn't have a global verbosity setting currently,
|
|
45
|
+
# but we can redirect stderr if needed
|
|
46
|
+
except ImportError:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
# Configure Python logging to be less verbose
|
|
50
|
+
import logging
|
|
51
|
+
logging.getLogger("transformers").setLevel(logging.ERROR)
|
|
52
|
+
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
|
|
53
|
+
logging.getLogger("mlx").setLevel(logging.ERROR)
|
|
54
|
+
logging.getLogger("chromadb").setLevel(logging.ERROR)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def enable_verbose_mode():
|
|
58
|
+
"""Re-enable verbose output for debugging."""
|
|
59
|
+
configure_quiet_mode(quiet=False)
|
|
60
|
+
|
|
61
|
+
# Restore defaults
|
|
62
|
+
os.environ.pop("HF_HUB_DISABLE_PROGRESS_BARS", None)
|
|
63
|
+
os.environ.pop("TRANSFORMERS_VERBOSITY", None)
|
|
64
|
+
|
|
65
|
+
# Re-enable warnings
|
|
66
|
+
warnings.filterwarnings("default")
|
|
67
|
+
|
|
68
|
+
# Reset logging levels
|
|
69
|
+
import logging
|
|
70
|
+
logging.getLogger("transformers").setLevel(logging.INFO)
|
|
71
|
+
logging.getLogger("sentence_transformers").setLevel(logging.INFO)
|
|
72
|
+
logging.getLogger("mlx").setLevel(logging.INFO)
|
|
73
|
+
logging.getLogger("chromadb").setLevel(logging.INFO)
|
keep/paths.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for locating paths.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_git_root(start_path: Optional[Path] = None) -> Optional[Path]:
|
|
12
|
+
"""
|
|
13
|
+
Find the root of the git repository containing the given path.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
start_path: Path to start searching from. Defaults to cwd.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Path to git root, or None if not in a git repository.
|
|
20
|
+
"""
|
|
21
|
+
if start_path is None:
|
|
22
|
+
start_path = Path.cwd()
|
|
23
|
+
|
|
24
|
+
current = start_path.resolve()
|
|
25
|
+
|
|
26
|
+
while current != current.parent:
|
|
27
|
+
if (current / ".git").exists():
|
|
28
|
+
return current
|
|
29
|
+
current = current.parent
|
|
30
|
+
|
|
31
|
+
# Check root as well
|
|
32
|
+
if (current / ".git").exists():
|
|
33
|
+
return current
|
|
34
|
+
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_default_store_path() -> Path:
|
|
39
|
+
"""
|
|
40
|
+
Get the default store path.
|
|
41
|
+
|
|
42
|
+
Priority:
|
|
43
|
+
1. KEEP_STORE_PATH environment variable
|
|
44
|
+
2. .keep/ directory at git repository root
|
|
45
|
+
3. ~/.keep/ in user's home directory (if not in a repo)
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Path to the store directory (may not exist yet).
|
|
49
|
+
"""
|
|
50
|
+
# Check environment variable first
|
|
51
|
+
env_path = os.environ.get("KEEP_STORE_PATH")
|
|
52
|
+
if env_path:
|
|
53
|
+
return Path(env_path).resolve()
|
|
54
|
+
|
|
55
|
+
# Try to find git root
|
|
56
|
+
git_root = find_git_root()
|
|
57
|
+
if git_root:
|
|
58
|
+
return git_root / ".keep"
|
|
59
|
+
|
|
60
|
+
# Fall back to home directory with warning
|
|
61
|
+
home = Path.home()
|
|
62
|
+
warnings.warn(
|
|
63
|
+
f"Not in a git repository. Using {home / '.keep'} for storage. "
|
|
64
|
+
f"Set KEEP_STORE_PATH to specify a different location.",
|
|
65
|
+
stacklevel=2,
|
|
66
|
+
)
|
|
67
|
+
return home / ".keep"
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pending summaries queue using SQLite.
|
|
3
|
+
|
|
4
|
+
Stores content that needs summarization for later processing.
|
|
5
|
+
This enables fast indexing with lazy summarization.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class PendingSummary:
|
|
17
|
+
"""A queued item awaiting summarization."""
|
|
18
|
+
id: str
|
|
19
|
+
collection: str
|
|
20
|
+
content: str
|
|
21
|
+
queued_at: str
|
|
22
|
+
attempts: int = 0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PendingSummaryQueue:
|
|
26
|
+
"""
|
|
27
|
+
SQLite-backed queue for pending summarizations.
|
|
28
|
+
|
|
29
|
+
Items are added during fast indexing (with truncated placeholder summary)
|
|
30
|
+
and processed later by `keep process-pending` or programmatically.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, queue_path: Path):
|
|
34
|
+
"""
|
|
35
|
+
Args:
|
|
36
|
+
queue_path: Path to SQLite database file
|
|
37
|
+
"""
|
|
38
|
+
self._queue_path = queue_path
|
|
39
|
+
self._conn: Optional[sqlite3.Connection] = None
|
|
40
|
+
self._init_db()
|
|
41
|
+
|
|
42
|
+
def _init_db(self) -> None:
|
|
43
|
+
"""Initialize the SQLite database."""
|
|
44
|
+
self._queue_path.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
self._conn = sqlite3.connect(str(self._queue_path), check_same_thread=False)
|
|
46
|
+
self._conn.execute("""
|
|
47
|
+
CREATE TABLE IF NOT EXISTS pending_summaries (
|
|
48
|
+
id TEXT NOT NULL,
|
|
49
|
+
collection TEXT NOT NULL,
|
|
50
|
+
content TEXT NOT NULL,
|
|
51
|
+
queued_at TEXT NOT NULL,
|
|
52
|
+
attempts INTEGER DEFAULT 0,
|
|
53
|
+
PRIMARY KEY (id, collection)
|
|
54
|
+
)
|
|
55
|
+
""")
|
|
56
|
+
self._conn.execute("""
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_queued_at
|
|
58
|
+
ON pending_summaries(queued_at)
|
|
59
|
+
""")
|
|
60
|
+
self._conn.commit()
|
|
61
|
+
|
|
62
|
+
def enqueue(self, id: str, collection: str, content: str) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Add an item to the pending queue.
|
|
65
|
+
|
|
66
|
+
If the same id+collection already exists, replaces it (newer content wins).
|
|
67
|
+
"""
|
|
68
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
69
|
+
self._conn.execute("""
|
|
70
|
+
INSERT OR REPLACE INTO pending_summaries
|
|
71
|
+
(id, collection, content, queued_at, attempts)
|
|
72
|
+
VALUES (?, ?, ?, ?, 0)
|
|
73
|
+
""", (id, collection, content, now))
|
|
74
|
+
self._conn.commit()
|
|
75
|
+
|
|
76
|
+
def dequeue(self, limit: int = 10) -> list[PendingSummary]:
|
|
77
|
+
"""
|
|
78
|
+
Get the oldest pending items for processing.
|
|
79
|
+
|
|
80
|
+
Items are returned but not removed - call `complete()` after successful processing.
|
|
81
|
+
Increments attempt counter on each dequeue.
|
|
82
|
+
"""
|
|
83
|
+
cursor = self._conn.execute("""
|
|
84
|
+
SELECT id, collection, content, queued_at, attempts
|
|
85
|
+
FROM pending_summaries
|
|
86
|
+
ORDER BY queued_at ASC
|
|
87
|
+
LIMIT ?
|
|
88
|
+
""", (limit,))
|
|
89
|
+
|
|
90
|
+
items = []
|
|
91
|
+
for row in cursor.fetchall():
|
|
92
|
+
items.append(PendingSummary(
|
|
93
|
+
id=row[0],
|
|
94
|
+
collection=row[1],
|
|
95
|
+
content=row[2],
|
|
96
|
+
queued_at=row[3],
|
|
97
|
+
attempts=row[4],
|
|
98
|
+
))
|
|
99
|
+
|
|
100
|
+
# Increment attempt counters
|
|
101
|
+
if items:
|
|
102
|
+
ids = [(item.id, item.collection) for item in items]
|
|
103
|
+
self._conn.executemany("""
|
|
104
|
+
UPDATE pending_summaries
|
|
105
|
+
SET attempts = attempts + 1
|
|
106
|
+
WHERE id = ? AND collection = ?
|
|
107
|
+
""", ids)
|
|
108
|
+
self._conn.commit()
|
|
109
|
+
|
|
110
|
+
return items
|
|
111
|
+
|
|
112
|
+
def complete(self, id: str, collection: str) -> None:
|
|
113
|
+
"""Remove an item from the queue after successful processing."""
|
|
114
|
+
self._conn.execute("""
|
|
115
|
+
DELETE FROM pending_summaries
|
|
116
|
+
WHERE id = ? AND collection = ?
|
|
117
|
+
""", (id, collection))
|
|
118
|
+
self._conn.commit()
|
|
119
|
+
|
|
120
|
+
def count(self) -> int:
|
|
121
|
+
"""Get count of pending items."""
|
|
122
|
+
cursor = self._conn.execute("SELECT COUNT(*) FROM pending_summaries")
|
|
123
|
+
return cursor.fetchone()[0]
|
|
124
|
+
|
|
125
|
+
def stats(self) -> dict:
|
|
126
|
+
"""Get queue statistics."""
|
|
127
|
+
cursor = self._conn.execute("""
|
|
128
|
+
SELECT
|
|
129
|
+
COUNT(*) as total,
|
|
130
|
+
COUNT(DISTINCT collection) as collections,
|
|
131
|
+
MAX(attempts) as max_attempts,
|
|
132
|
+
MIN(queued_at) as oldest
|
|
133
|
+
FROM pending_summaries
|
|
134
|
+
""")
|
|
135
|
+
row = cursor.fetchone()
|
|
136
|
+
return {
|
|
137
|
+
"pending": row[0],
|
|
138
|
+
"collections": row[1],
|
|
139
|
+
"max_attempts": row[2] or 0,
|
|
140
|
+
"oldest": row[3],
|
|
141
|
+
"queue_path": str(self._queue_path),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
def clear(self) -> int:
|
|
145
|
+
"""Clear all pending items. Returns count of items cleared."""
|
|
146
|
+
count = self.count()
|
|
147
|
+
self._conn.execute("DELETE FROM pending_summaries")
|
|
148
|
+
self._conn.commit()
|
|
149
|
+
return count
|
|
150
|
+
|
|
151
|
+
def close(self) -> None:
|
|
152
|
+
"""Close the database connection."""
|
|
153
|
+
if self._conn is not None:
|
|
154
|
+
self._conn.close()
|
|
155
|
+
self._conn = None
|
|
156
|
+
|
|
157
|
+
def __enter__(self):
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
161
|
+
self.close()
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
def __del__(self):
|
|
165
|
+
"""Ensure connection is closed on garbage collection."""
|
|
166
|
+
self.close()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider interfaces for associative memory services.
|
|
3
|
+
|
|
4
|
+
Each provider type defines a protocol that concrete implementations must follow.
|
|
5
|
+
Providers are configured at store initialization and handle the heavy lifting of:
|
|
6
|
+
- Embedding generation (for semantic search)
|
|
7
|
+
- Summarization (for human-readable recall)
|
|
8
|
+
- Tagging (for structured navigation)
|
|
9
|
+
- Document fetching (for URI resolution)
|
|
10
|
+
|
|
11
|
+
Concrete providers are lazily loaded when first requested via the registry.
|
|
12
|
+
This avoids import-time failures when optional dependencies are missing.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .base import (
|
|
16
|
+
Document,
|
|
17
|
+
EmbeddingProvider,
|
|
18
|
+
SummarizationProvider,
|
|
19
|
+
TaggingProvider,
|
|
20
|
+
DocumentProvider,
|
|
21
|
+
ProviderRegistry,
|
|
22
|
+
get_registry,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Providers are now loaded lazily by ProviderRegistry._ensure_providers_loaded()
|
|
26
|
+
# This avoids import-time failures when optional dependencies are missing
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Protocols
|
|
30
|
+
"EmbeddingProvider",
|
|
31
|
+
"SummarizationProvider",
|
|
32
|
+
"TaggingProvider",
|
|
33
|
+
"DocumentProvider",
|
|
34
|
+
# Data types
|
|
35
|
+
"Document",
|
|
36
|
+
# Registry
|
|
37
|
+
"ProviderRegistry",
|
|
38
|
+
"get_registry",
|
|
39
|
+
]
|
|
40
|
+
|