keep-skill 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +3 -6
- keep/api.py +1052 -145
- keep/cli.py +705 -132
- keep/config.py +172 -41
- keep/context.py +1 -125
- keep/document_store.py +908 -0
- keep/errors.py +33 -0
- keep/indexing.py +1 -1
- keep/logging_config.py +34 -3
- keep/paths.py +81 -17
- keep/pending_summaries.py +52 -40
- keep/providers/embedding_cache.py +59 -46
- keep/providers/embeddings.py +43 -13
- keep/providers/mlx.py +23 -21
- keep/store.py +169 -25
- keep_skill-0.3.0.dist-info/METADATA +218 -0
- keep_skill-0.3.0.dist-info/RECORD +28 -0
- keep_skill-0.1.0.dist-info/METADATA +0 -290
- keep_skill-0.1.0.dist-info/RECORD +0 -26
- {keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/WHEEL +0 -0
- {keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/entry_points.txt +0 -0
- {keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/licenses/LICENSE +0 -0
keep/config.py
CHANGED
|
@@ -11,7 +11,7 @@ import tomllib
|
|
|
11
11
|
from dataclasses import dataclass, field
|
|
12
12
|
from datetime import datetime, timezone
|
|
13
13
|
from pathlib import Path
|
|
14
|
-
from typing import Any
|
|
14
|
+
from typing import Any, Optional
|
|
15
15
|
|
|
16
16
|
# tomli_w for writing TOML (tomllib is read-only)
|
|
17
17
|
try:
|
|
@@ -21,7 +21,7 @@ except ImportError:
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
CONFIG_FILENAME = "keep.toml"
|
|
24
|
-
CONFIG_VERSION =
|
|
24
|
+
CONFIG_VERSION = 3 # Bumped for document versioning support
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
@dataclass
|
|
@@ -31,23 +31,72 @@ class ProviderConfig:
|
|
|
31
31
|
params: dict[str, Any] = field(default_factory=dict)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
@dataclass
|
|
35
|
+
class EmbeddingIdentity:
|
|
36
|
+
"""
|
|
37
|
+
Identity of an embedding model for compatibility checking.
|
|
38
|
+
|
|
39
|
+
Two embeddings are compatible only if they have the same identity.
|
|
40
|
+
Different models, even with the same dimension, produce incompatible vectors.
|
|
41
|
+
"""
|
|
42
|
+
provider: str # e.g., "sentence-transformers", "openai"
|
|
43
|
+
model: str # e.g., "all-MiniLM-L6-v2", "text-embedding-3-small"
|
|
44
|
+
dimension: int # e.g., 384, 1536
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def key(self) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Short key for collection naming.
|
|
50
|
+
|
|
51
|
+
Format: {provider}_{model_slug}
|
|
52
|
+
e.g., "st_MiniLM_L6_v2", "openai_3_small"
|
|
53
|
+
"""
|
|
54
|
+
# Simplify model name for use in collection names
|
|
55
|
+
model_slug = self.model.replace("-", "_").replace(".", "_")
|
|
56
|
+
# Remove common prefixes
|
|
57
|
+
for prefix in ["all_", "text_embedding_"]:
|
|
58
|
+
if model_slug.lower().startswith(prefix):
|
|
59
|
+
model_slug = model_slug[len(prefix):]
|
|
60
|
+
# Shorten provider names
|
|
61
|
+
provider_short = {
|
|
62
|
+
"sentence-transformers": "st",
|
|
63
|
+
"openai": "openai",
|
|
64
|
+
"gemini": "gemini",
|
|
65
|
+
"ollama": "ollama",
|
|
66
|
+
}.get(self.provider, self.provider[:6])
|
|
67
|
+
|
|
68
|
+
return f"{provider_short}_{model_slug}"
|
|
69
|
+
|
|
70
|
+
|
|
34
71
|
@dataclass
|
|
35
72
|
class StoreConfig:
|
|
36
73
|
"""Complete store configuration."""
|
|
37
|
-
path: Path
|
|
74
|
+
path: Path # Store path (where data lives)
|
|
75
|
+
config_dir: Optional[Path] = None # Where config was loaded from (may differ from path)
|
|
76
|
+
store_path: Optional[str] = None # Explicit store.path from config file (raw string)
|
|
38
77
|
version: int = CONFIG_VERSION
|
|
39
78
|
created: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
40
|
-
|
|
79
|
+
|
|
41
80
|
# Provider configurations
|
|
42
81
|
embedding: ProviderConfig = field(default_factory=lambda: ProviderConfig("sentence-transformers"))
|
|
43
82
|
summarization: ProviderConfig = field(default_factory=lambda: ProviderConfig("truncate"))
|
|
44
83
|
document: ProviderConfig = field(default_factory=lambda: ProviderConfig("composite"))
|
|
45
|
-
|
|
84
|
+
|
|
85
|
+
# Embedding identity (set after first use, used for validation)
|
|
86
|
+
embedding_identity: Optional[EmbeddingIdentity] = None
|
|
87
|
+
|
|
88
|
+
# Default tags applied to all update/remember operations
|
|
89
|
+
default_tags: dict[str, str] = field(default_factory=dict)
|
|
90
|
+
|
|
91
|
+
# Maximum length for summaries (used for smart remember and validation)
|
|
92
|
+
max_summary_length: int = 500
|
|
93
|
+
|
|
46
94
|
@property
|
|
47
95
|
def config_path(self) -> Path:
|
|
48
96
|
"""Path to the TOML config file."""
|
|
49
|
-
|
|
50
|
-
|
|
97
|
+
config_location = self.config_dir if self.config_dir else self.path
|
|
98
|
+
return config_location / CONFIG_FILENAME
|
|
99
|
+
|
|
51
100
|
def exists(self) -> bool:
|
|
52
101
|
"""Check if config file exists."""
|
|
53
102
|
return self.config_path.exists()
|
|
@@ -182,9 +231,13 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
|
|
|
182
231
|
params["model"] = ms_model
|
|
183
232
|
embedding_provider = ProviderConfig("gemini", params)
|
|
184
233
|
|
|
185
|
-
# Fall back to
|
|
234
|
+
# Fall back to local embedding (prefer MPS-accelerated on Apple Silicon)
|
|
186
235
|
if embedding_provider is None:
|
|
187
|
-
|
|
236
|
+
if is_apple_silicon:
|
|
237
|
+
# Use sentence-transformers with MPS acceleration (no auth required)
|
|
238
|
+
embedding_provider = ProviderConfig("mlx", {"model": "all-MiniLM-L6-v2"})
|
|
239
|
+
else:
|
|
240
|
+
embedding_provider = ProviderConfig("sentence-transformers")
|
|
188
241
|
|
|
189
242
|
providers["embedding"] = embedding_provider
|
|
190
243
|
|
|
@@ -225,99 +278,177 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
|
|
|
225
278
|
return providers
|
|
226
279
|
|
|
227
280
|
|
|
228
|
-
def create_default_config(store_path: Path) -> StoreConfig:
|
|
229
|
-
"""
|
|
281
|
+
def create_default_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
|
|
282
|
+
"""
|
|
283
|
+
Create a new config with auto-detected defaults.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
config_dir: Directory where keep.toml will be saved
|
|
287
|
+
store_path: Optional explicit store location (if different from config_dir)
|
|
288
|
+
"""
|
|
230
289
|
providers = detect_default_providers()
|
|
231
|
-
|
|
290
|
+
|
|
291
|
+
# If store_path is provided and different from config_dir, record it
|
|
292
|
+
store_path_str = None
|
|
293
|
+
actual_store = config_dir
|
|
294
|
+
if store_path and store_path.resolve() != config_dir.resolve():
|
|
295
|
+
store_path_str = str(store_path)
|
|
296
|
+
actual_store = store_path
|
|
297
|
+
|
|
232
298
|
return StoreConfig(
|
|
233
|
-
path=
|
|
299
|
+
path=actual_store,
|
|
300
|
+
config_dir=config_dir,
|
|
301
|
+
store_path=store_path_str,
|
|
234
302
|
embedding=providers["embedding"],
|
|
235
303
|
summarization=providers["summarization"],
|
|
236
304
|
document=providers["document"],
|
|
237
305
|
)
|
|
238
306
|
|
|
239
307
|
|
|
240
|
-
def load_config(
|
|
308
|
+
def load_config(config_dir: Path) -> StoreConfig:
|
|
241
309
|
"""
|
|
242
|
-
Load configuration from a
|
|
243
|
-
|
|
310
|
+
Load configuration from a config directory.
|
|
311
|
+
|
|
312
|
+
The config_dir is where keep.toml lives. The actual store location
|
|
313
|
+
may be different if store.path is set in the config.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
config_dir: Directory containing keep.toml
|
|
317
|
+
|
|
244
318
|
Raises:
|
|
245
319
|
FileNotFoundError: If config doesn't exist
|
|
246
320
|
ValueError: If config is invalid
|
|
247
321
|
"""
|
|
248
|
-
config_path =
|
|
249
|
-
|
|
322
|
+
config_path = config_dir / CONFIG_FILENAME
|
|
323
|
+
|
|
250
324
|
if not config_path.exists():
|
|
251
325
|
raise FileNotFoundError(f"Config not found: {config_path}")
|
|
252
|
-
|
|
326
|
+
|
|
253
327
|
with open(config_path, "rb") as f:
|
|
254
328
|
data = tomllib.load(f)
|
|
255
|
-
|
|
329
|
+
|
|
256
330
|
# Validate version
|
|
257
331
|
version = data.get("store", {}).get("version", 1)
|
|
258
332
|
if version > CONFIG_VERSION:
|
|
259
333
|
raise ValueError(f"Config version {version} is newer than supported ({CONFIG_VERSION})")
|
|
260
|
-
|
|
334
|
+
|
|
335
|
+
# Parse store.path - explicit store location
|
|
336
|
+
store_path_str = data.get("store", {}).get("path")
|
|
337
|
+
if store_path_str:
|
|
338
|
+
actual_store = Path(store_path_str).expanduser().resolve()
|
|
339
|
+
else:
|
|
340
|
+
actual_store = config_dir # Backwards compat: store is at config location
|
|
341
|
+
|
|
261
342
|
# Parse provider configs
|
|
262
343
|
def parse_provider(section: dict) -> ProviderConfig:
|
|
263
344
|
return ProviderConfig(
|
|
264
345
|
name=section.get("name", ""),
|
|
265
346
|
params={k: v for k, v in section.items() if k != "name"},
|
|
266
347
|
)
|
|
267
|
-
|
|
348
|
+
|
|
349
|
+
# Parse default tags (filter out system tags)
|
|
350
|
+
raw_tags = data.get("tags", {})
|
|
351
|
+
default_tags = {k: str(v) for k, v in raw_tags.items()
|
|
352
|
+
if not k.startswith("_")}
|
|
353
|
+
|
|
354
|
+
# Parse max_summary_length (default 500)
|
|
355
|
+
max_summary_length = data.get("store", {}).get("max_summary_length", 500)
|
|
356
|
+
|
|
268
357
|
return StoreConfig(
|
|
269
|
-
path=
|
|
358
|
+
path=actual_store,
|
|
359
|
+
config_dir=config_dir,
|
|
360
|
+
store_path=store_path_str,
|
|
270
361
|
version=version,
|
|
271
362
|
created=data.get("store", {}).get("created", ""),
|
|
272
363
|
embedding=parse_provider(data.get("embedding", {"name": "sentence-transformers"})),
|
|
273
364
|
summarization=parse_provider(data.get("summarization", {"name": "truncate"})),
|
|
274
365
|
document=parse_provider(data.get("document", {"name": "composite"})),
|
|
366
|
+
embedding_identity=parse_embedding_identity(data.get("embedding_identity")),
|
|
367
|
+
default_tags=default_tags,
|
|
368
|
+
max_summary_length=max_summary_length,
|
|
275
369
|
)
|
|
276
370
|
|
|
277
371
|
|
|
372
|
+
def parse_embedding_identity(data: dict | None) -> EmbeddingIdentity | None:
|
|
373
|
+
"""Parse embedding identity from config data."""
|
|
374
|
+
if data is None:
|
|
375
|
+
return None
|
|
376
|
+
provider = data.get("provider")
|
|
377
|
+
model = data.get("model")
|
|
378
|
+
dimension = data.get("dimension")
|
|
379
|
+
if provider and model and dimension:
|
|
380
|
+
return EmbeddingIdentity(provider=provider, model=model, dimension=dimension)
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
|
|
278
384
|
def save_config(config: StoreConfig) -> None:
|
|
279
385
|
"""
|
|
280
|
-
Save configuration to the
|
|
281
|
-
|
|
386
|
+
Save configuration to the config directory.
|
|
387
|
+
|
|
282
388
|
Creates the directory if it doesn't exist.
|
|
283
389
|
"""
|
|
284
390
|
if tomli_w is None:
|
|
285
391
|
raise RuntimeError("tomli_w is required to save config. Install with: pip install tomli-w")
|
|
286
|
-
|
|
287
|
-
# Ensure directory exists
|
|
288
|
-
config.
|
|
289
|
-
|
|
392
|
+
|
|
393
|
+
# Ensure config directory exists
|
|
394
|
+
config_location = config.config_dir if config.config_dir else config.path
|
|
395
|
+
config_location.mkdir(parents=True, exist_ok=True)
|
|
396
|
+
|
|
290
397
|
# Build TOML structure
|
|
291
398
|
def provider_to_dict(p: ProviderConfig) -> dict:
|
|
292
399
|
d = {"name": p.name}
|
|
293
400
|
d.update(p.params)
|
|
294
401
|
return d
|
|
295
|
-
|
|
402
|
+
|
|
403
|
+
store_section: dict[str, Any] = {
|
|
404
|
+
"version": config.version,
|
|
405
|
+
"created": config.created,
|
|
406
|
+
}
|
|
407
|
+
# Only write store.path if explicitly set (not default)
|
|
408
|
+
if config.store_path:
|
|
409
|
+
store_section["path"] = config.store_path
|
|
410
|
+
# Only write max_summary_length if not default
|
|
411
|
+
if config.max_summary_length != 500:
|
|
412
|
+
store_section["max_summary_length"] = config.max_summary_length
|
|
413
|
+
|
|
296
414
|
data = {
|
|
297
|
-
"store":
|
|
298
|
-
"version": config.version,
|
|
299
|
-
"created": config.created,
|
|
300
|
-
},
|
|
415
|
+
"store": store_section,
|
|
301
416
|
"embedding": provider_to_dict(config.embedding),
|
|
302
417
|
"summarization": provider_to_dict(config.summarization),
|
|
303
418
|
"document": provider_to_dict(config.document),
|
|
304
419
|
}
|
|
305
|
-
|
|
420
|
+
|
|
421
|
+
# Add embedding identity if set
|
|
422
|
+
if config.embedding_identity:
|
|
423
|
+
data["embedding_identity"] = {
|
|
424
|
+
"provider": config.embedding_identity.provider,
|
|
425
|
+
"model": config.embedding_identity.model,
|
|
426
|
+
"dimension": config.embedding_identity.dimension,
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Add default tags if set
|
|
430
|
+
if config.default_tags:
|
|
431
|
+
data["tags"] = config.default_tags
|
|
432
|
+
|
|
306
433
|
with open(config.config_path, "wb") as f:
|
|
307
434
|
tomli_w.dump(data, f)
|
|
308
435
|
|
|
309
436
|
|
|
310
|
-
def load_or_create_config(store_path: Path) -> StoreConfig:
|
|
437
|
+
def load_or_create_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
|
|
311
438
|
"""
|
|
312
439
|
Load existing config or create a new one with defaults.
|
|
313
|
-
|
|
440
|
+
|
|
314
441
|
This is the main entry point for config management.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
config_dir: Directory containing (or to contain) keep.toml
|
|
445
|
+
store_path: Optional explicit store location (for new configs only)
|
|
315
446
|
"""
|
|
316
|
-
config_path =
|
|
317
|
-
|
|
447
|
+
config_path = config_dir / CONFIG_FILENAME
|
|
448
|
+
|
|
318
449
|
if config_path.exists():
|
|
319
|
-
return load_config(
|
|
450
|
+
return load_config(config_dir)
|
|
320
451
|
else:
|
|
321
|
-
config = create_default_config(store_path)
|
|
452
|
+
config = create_default_config(config_dir, store_path)
|
|
322
453
|
save_config(config)
|
|
323
454
|
return config
|
keep/context.py
CHANGED
|
@@ -1,127 +1,3 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
This module provides hierarchical context management for efficient
|
|
5
|
-
"what are we working on?" queries with O(log(log(N))) retrieval.
|
|
2
|
+
Context module - placeholder for future routing functionality.
|
|
6
3
|
"""
|
|
7
|
-
|
|
8
|
-
from dataclasses import dataclass, field
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
from typing import Any, Optional
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class WorkingContext:
|
|
15
|
-
"""
|
|
16
|
-
The current working context — a high-level summary of active work.
|
|
17
|
-
|
|
18
|
-
This is the "Level 3" summary that any agent can read to instantly
|
|
19
|
-
understand what's being worked on.
|
|
20
|
-
|
|
21
|
-
Attributes:
|
|
22
|
-
summary: Natural language description of current focus
|
|
23
|
-
active_items: IDs of items currently being worked with
|
|
24
|
-
topics: Active topic/domain tags
|
|
25
|
-
updated: When context was last updated
|
|
26
|
-
session_id: Current session identifier
|
|
27
|
-
metadata: Additional context-specific data (arbitrary structure)
|
|
28
|
-
"""
|
|
29
|
-
summary: str
|
|
30
|
-
active_items: list[str] = field(default_factory=list)
|
|
31
|
-
topics: list[str] = field(default_factory=list)
|
|
32
|
-
updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
33
|
-
session_id: Optional[str] = None
|
|
34
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@dataclass
|
|
38
|
-
class TopicSummary:
|
|
39
|
-
"""
|
|
40
|
-
A summary of items within a topic cluster (Level 2).
|
|
41
|
-
|
|
42
|
-
Topics aggregate related items and provide a mid-level
|
|
43
|
-
overview without retrieving all underlying items.
|
|
44
|
-
|
|
45
|
-
Attributes:
|
|
46
|
-
topic: Topic identifier (tag value)
|
|
47
|
-
summary: Generated summary of topic contents
|
|
48
|
-
item_count: Number of items in this topic
|
|
49
|
-
key_items: IDs of the most important items in the topic
|
|
50
|
-
subtopics: Child topics if hierarchical
|
|
51
|
-
updated: When topic summary was last regenerated
|
|
52
|
-
"""
|
|
53
|
-
topic: str
|
|
54
|
-
summary: str
|
|
55
|
-
item_count: int
|
|
56
|
-
key_items: list[str] = field(default_factory=list)
|
|
57
|
-
subtopics: list[str] = field(default_factory=list)
|
|
58
|
-
updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@dataclass
|
|
62
|
-
class RoutingContext:
|
|
63
|
-
"""
|
|
64
|
-
Describes how items are routed between private and shared stores.
|
|
65
|
-
|
|
66
|
-
This document lives at a well-known location in the shared store.
|
|
67
|
-
The facade reads it to make routing decisions. The private store
|
|
68
|
-
is physically separate and invisible from the shared store.
|
|
69
|
-
|
|
70
|
-
Attributes:
|
|
71
|
-
summary: Natural language description of the privacy model
|
|
72
|
-
private_patterns: Tag patterns that route to private store (each pattern is dict[str, str])
|
|
73
|
-
private_store_path: Location of the private store (if local)
|
|
74
|
-
updated: When routing was last modified
|
|
75
|
-
metadata: Additional routing configuration
|
|
76
|
-
"""
|
|
77
|
-
summary: str = "Items tagged for private/draft visibility route to a separate store."
|
|
78
|
-
private_patterns: list[dict[str, str]] = field(default_factory=lambda: [
|
|
79
|
-
{"_visibility": "draft"},
|
|
80
|
-
{"_visibility": "private"},
|
|
81
|
-
{"_for": "self"},
|
|
82
|
-
])
|
|
83
|
-
private_store_path: Optional[str] = None # Resolved at init; None = default location
|
|
84
|
-
updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
85
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# Well-known item ID for the routing context document
|
|
89
|
-
ROUTING_CONTEXT_ID = "_system:routing"
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
# Reserved system tags for context management (stored with items)
|
|
93
|
-
CONTEXT_TAGS = {
|
|
94
|
-
"_session": "Session that last touched this item",
|
|
95
|
-
"_topic": "Primary topic classification",
|
|
96
|
-
"_level": "Hierarchy level (0=source, 1=cluster, 2=topic, 3=context)",
|
|
97
|
-
"_summarizes": "IDs of items this item summarizes (for hierarchy)",
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
# Relevance scoring is computed at query time, NOT stored.
|
|
101
|
-
# This preserves agility between broad exploration and focused work.
|
|
102
|
-
# Score factors:
|
|
103
|
-
# - semantic similarity to query/hint
|
|
104
|
-
# - recency (time decay)
|
|
105
|
-
# - topic overlap with current WorkingContext.topics
|
|
106
|
-
# - session affinity (same session = boost)
|
|
107
|
-
# The weighting of these factors can vary by retrieval mode.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def generate_session_id() -> str:
|
|
111
|
-
"""Generate a unique session identifier."""
|
|
112
|
-
import uuid
|
|
113
|
-
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
114
|
-
short_uuid = uuid.uuid4().hex[:8]
|
|
115
|
-
return f"{date}:{short_uuid}"
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def matches_private_pattern(tags: dict[str, str], patterns: list[dict[str, str]]) -> bool:
|
|
119
|
-
"""
|
|
120
|
-
Check if an item's tags match any private routing pattern.
|
|
121
|
-
|
|
122
|
-
A pattern matches if ALL its key-value pairs are present in tags.
|
|
123
|
-
"""
|
|
124
|
-
for pattern in patterns:
|
|
125
|
-
if all(tags.get(k) == v for k, v in pattern.items()):
|
|
126
|
-
return True
|
|
127
|
-
return False
|