keep-skill 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +1 -1
- keep/api.py +265 -10
- keep/cli.py +254 -19
- keep/config.py +2 -2
- keep/document_store.py +351 -12
- keep/pending_summaries.py +6 -0
- keep/providers/embedding_cache.py +6 -0
- keep/store.py +111 -11
- keep_skill-0.3.0.dist-info/METADATA +218 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.3.0.dist-info}/RECORD +13 -13
- keep_skill-0.2.0.dist-info/METADATA +0 -304
- {keep_skill-0.2.0.dist-info → keep_skill-0.3.0.dist-info}/WHEEL +0 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.3.0.dist-info}/entry_points.txt +0 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.3.0.dist-info}/licenses/LICENSE +0 -0
keep/__init__.py
CHANGED
keep/api.py
CHANGED
|
@@ -114,6 +114,7 @@ from .providers.base import (
|
|
|
114
114
|
SummarizationProvider,
|
|
115
115
|
)
|
|
116
116
|
from .providers.embedding_cache import CachingEmbeddingProvider
|
|
117
|
+
from .document_store import VersionInfo
|
|
117
118
|
from .store import ChromaStore
|
|
118
119
|
from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
|
|
119
120
|
|
|
@@ -192,6 +193,25 @@ def _content_hash(content: str) -> str:
|
|
|
192
193
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
193
194
|
|
|
194
195
|
|
|
196
|
+
def _text_content_id(content: str) -> str:
|
|
197
|
+
"""
|
|
198
|
+
Generate a content-addressed ID for text updates.
|
|
199
|
+
|
|
200
|
+
This makes text updates versioned by content:
|
|
201
|
+
- `keep update "my note"` → ID = _text:{hash[:12]}
|
|
202
|
+
- `keep update "my note" -t status=done` → same ID, new version
|
|
203
|
+
- `keep update "different note"` → different ID
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
content: The text content
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Content-addressed ID in format _text:{hash[:12]}
|
|
210
|
+
"""
|
|
211
|
+
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
|
|
212
|
+
return f"_text:{content_hash}"
|
|
213
|
+
|
|
214
|
+
|
|
195
215
|
class Keeper:
|
|
196
216
|
"""
|
|
197
217
|
Semantic memory keeper - persistent storage with similarity search.
|
|
@@ -516,14 +536,20 @@ class Keeper:
|
|
|
516
536
|
if doc.content_type:
|
|
517
537
|
merged_tags["_content_type"] = doc.content_type
|
|
518
538
|
|
|
539
|
+
# Get existing doc info for versioning before upsert
|
|
540
|
+
old_doc = self._document_store.get(coll, id)
|
|
541
|
+
|
|
519
542
|
# Dual-write: document store (canonical) + ChromaDB (embedding index)
|
|
520
|
-
|
|
543
|
+
# DocumentStore.upsert now returns (record, content_changed) and archives old version
|
|
544
|
+
doc_record, content_changed = self._document_store.upsert(
|
|
521
545
|
collection=coll,
|
|
522
546
|
id=id,
|
|
523
547
|
summary=final_summary,
|
|
524
548
|
tags=merged_tags,
|
|
525
549
|
content_hash=new_hash,
|
|
526
550
|
)
|
|
551
|
+
|
|
552
|
+
# Store embedding for current version
|
|
527
553
|
self._store.upsert(
|
|
528
554
|
collection=coll,
|
|
529
555
|
id=id,
|
|
@@ -532,6 +558,23 @@ class Keeper:
|
|
|
532
558
|
tags=merged_tags,
|
|
533
559
|
)
|
|
534
560
|
|
|
561
|
+
# If content changed and we archived a version, also store versioned embedding
|
|
562
|
+
# Skip if content hash is same (only tags/summary changed)
|
|
563
|
+
if old_doc is not None and content_changed:
|
|
564
|
+
# Get the version number that was just archived
|
|
565
|
+
version_count = self._document_store.version_count(coll, id)
|
|
566
|
+
if version_count > 0:
|
|
567
|
+
# Re-embed the old content for the archived version
|
|
568
|
+
old_embedding = self._get_embedding_provider().embed(old_doc.summary)
|
|
569
|
+
self._store.upsert_version(
|
|
570
|
+
collection=coll,
|
|
571
|
+
id=id,
|
|
572
|
+
version=version_count,
|
|
573
|
+
embedding=old_embedding,
|
|
574
|
+
summary=old_doc.summary,
|
|
575
|
+
tags=old_doc.tags,
|
|
576
|
+
)
|
|
577
|
+
|
|
535
578
|
# Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
|
|
536
579
|
if lazy and summary is None and not content_unchanged:
|
|
537
580
|
self._spawn_processor()
|
|
@@ -671,14 +714,20 @@ class Keeper:
|
|
|
671
714
|
# Add system tags
|
|
672
715
|
merged_tags["_source"] = "inline"
|
|
673
716
|
|
|
717
|
+
# Get existing doc info for versioning before upsert
|
|
718
|
+
old_doc = self._document_store.get(coll, id)
|
|
719
|
+
|
|
674
720
|
# Dual-write: document store (canonical) + ChromaDB (embedding index)
|
|
675
|
-
|
|
721
|
+
# DocumentStore.upsert now returns (record, content_changed) and archives old version
|
|
722
|
+
doc_record, content_changed = self._document_store.upsert(
|
|
676
723
|
collection=coll,
|
|
677
724
|
id=id,
|
|
678
725
|
summary=final_summary,
|
|
679
726
|
tags=merged_tags,
|
|
680
727
|
content_hash=new_hash,
|
|
681
728
|
)
|
|
729
|
+
|
|
730
|
+
# Store embedding for current version
|
|
682
731
|
self._store.upsert(
|
|
683
732
|
collection=coll,
|
|
684
733
|
id=id,
|
|
@@ -687,6 +736,23 @@ class Keeper:
|
|
|
687
736
|
tags=merged_tags,
|
|
688
737
|
)
|
|
689
738
|
|
|
739
|
+
# If content changed and we archived a version, also store versioned embedding
|
|
740
|
+
# Skip if content hash is same (only tags/summary changed)
|
|
741
|
+
if old_doc is not None and content_changed:
|
|
742
|
+
# Get the version number that was just archived
|
|
743
|
+
version_count = self._document_store.version_count(coll, id)
|
|
744
|
+
if version_count > 0:
|
|
745
|
+
# Re-embed the old content for the archived version
|
|
746
|
+
old_embedding = self._get_embedding_provider().embed(old_doc.summary)
|
|
747
|
+
self._store.upsert_version(
|
|
748
|
+
collection=coll,
|
|
749
|
+
id=id,
|
|
750
|
+
version=version_count,
|
|
751
|
+
embedding=old_embedding,
|
|
752
|
+
summary=old_doc.summary,
|
|
753
|
+
tags=old_doc.tags,
|
|
754
|
+
)
|
|
755
|
+
|
|
690
756
|
# Spawn background processor if lazy and content was queued (only if content changed)
|
|
691
757
|
if lazy and summary is None and len(content) > max_len and not content_unchanged:
|
|
692
758
|
self._spawn_processor()
|
|
@@ -993,7 +1059,95 @@ class Keeper:
|
|
|
993
1059
|
if result is None:
|
|
994
1060
|
return None
|
|
995
1061
|
return result.to_item()
|
|
996
|
-
|
|
1062
|
+
|
|
1063
|
+
def get_version(
|
|
1064
|
+
self,
|
|
1065
|
+
id: str,
|
|
1066
|
+
offset: int = 0,
|
|
1067
|
+
*,
|
|
1068
|
+
collection: Optional[str] = None,
|
|
1069
|
+
) -> Optional[Item]:
|
|
1070
|
+
"""
|
|
1071
|
+
Get a specific version of a document by offset.
|
|
1072
|
+
|
|
1073
|
+
Offset semantics:
|
|
1074
|
+
- 0 = current version
|
|
1075
|
+
- 1 = previous version
|
|
1076
|
+
- 2 = two versions ago
|
|
1077
|
+
- etc.
|
|
1078
|
+
|
|
1079
|
+
Args:
|
|
1080
|
+
id: Document identifier
|
|
1081
|
+
offset: Version offset (0=current, 1=previous, etc.)
|
|
1082
|
+
collection: Target collection
|
|
1083
|
+
|
|
1084
|
+
Returns:
|
|
1085
|
+
Item if found, None if version doesn't exist
|
|
1086
|
+
"""
|
|
1087
|
+
coll = self._resolve_collection(collection)
|
|
1088
|
+
|
|
1089
|
+
if offset == 0:
|
|
1090
|
+
# Current version
|
|
1091
|
+
return self.get(id, collection=collection)
|
|
1092
|
+
|
|
1093
|
+
# Get archived version
|
|
1094
|
+
version_info = self._document_store.get_version(coll, id, offset)
|
|
1095
|
+
if version_info is None:
|
|
1096
|
+
return None
|
|
1097
|
+
|
|
1098
|
+
return Item(
|
|
1099
|
+
id=id,
|
|
1100
|
+
summary=version_info.summary,
|
|
1101
|
+
tags=version_info.tags,
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1104
|
+
def list_versions(
|
|
1105
|
+
self,
|
|
1106
|
+
id: str,
|
|
1107
|
+
limit: int = 10,
|
|
1108
|
+
*,
|
|
1109
|
+
collection: Optional[str] = None,
|
|
1110
|
+
) -> list[VersionInfo]:
|
|
1111
|
+
"""
|
|
1112
|
+
List version history for a document.
|
|
1113
|
+
|
|
1114
|
+
Returns versions in reverse chronological order (newest archived first).
|
|
1115
|
+
Does not include the current version.
|
|
1116
|
+
|
|
1117
|
+
Args:
|
|
1118
|
+
id: Document identifier
|
|
1119
|
+
limit: Maximum versions to return
|
|
1120
|
+
collection: Target collection
|
|
1121
|
+
|
|
1122
|
+
Returns:
|
|
1123
|
+
List of VersionInfo, newest archived first
|
|
1124
|
+
"""
|
|
1125
|
+
coll = self._resolve_collection(collection)
|
|
1126
|
+
return self._document_store.list_versions(coll, id, limit)
|
|
1127
|
+
|
|
1128
|
+
def get_version_nav(
|
|
1129
|
+
self,
|
|
1130
|
+
id: str,
|
|
1131
|
+
current_version: Optional[int] = None,
|
|
1132
|
+
limit: int = 3,
|
|
1133
|
+
*,
|
|
1134
|
+
collection: Optional[str] = None,
|
|
1135
|
+
) -> dict[str, list[VersionInfo]]:
|
|
1136
|
+
"""
|
|
1137
|
+
Get version navigation info (prev/next) for display.
|
|
1138
|
+
|
|
1139
|
+
Args:
|
|
1140
|
+
id: Document identifier
|
|
1141
|
+
current_version: The version being viewed (None = current/live version)
|
|
1142
|
+
limit: Max previous versions to return when viewing current
|
|
1143
|
+
collection: Target collection
|
|
1144
|
+
|
|
1145
|
+
Returns:
|
|
1146
|
+
Dict with 'prev' and optionally 'next' lists of VersionInfo.
|
|
1147
|
+
"""
|
|
1148
|
+
coll = self._resolve_collection(collection)
|
|
1149
|
+
return self._document_store.get_version_nav(coll, id, current_version, limit)
|
|
1150
|
+
|
|
997
1151
|
def exists(self, id: str, *, collection: Optional[str] = None) -> bool:
|
|
998
1152
|
"""
|
|
999
1153
|
Check if an item exists in the store.
|
|
@@ -1002,16 +1156,28 @@ class Keeper:
|
|
|
1002
1156
|
# Check document store first, then ChromaDB
|
|
1003
1157
|
return self._document_store.exists(coll, id) or self._store.exists(coll, id)
|
|
1004
1158
|
|
|
1005
|
-
def delete(
|
|
1159
|
+
def delete(
|
|
1160
|
+
self,
|
|
1161
|
+
id: str,
|
|
1162
|
+
*,
|
|
1163
|
+
collection: Optional[str] = None,
|
|
1164
|
+
delete_versions: bool = True,
|
|
1165
|
+
) -> bool:
|
|
1006
1166
|
"""
|
|
1007
1167
|
Delete an item from both stores.
|
|
1008
1168
|
|
|
1009
|
-
|
|
1169
|
+
Args:
|
|
1170
|
+
id: Document identifier
|
|
1171
|
+
collection: Target collection
|
|
1172
|
+
delete_versions: If True, also delete version history
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
True if item existed and was deleted.
|
|
1010
1176
|
"""
|
|
1011
1177
|
coll = self._resolve_collection(collection)
|
|
1012
|
-
# Delete from both stores
|
|
1013
|
-
doc_deleted = self._document_store.delete(coll, id)
|
|
1014
|
-
chroma_deleted = self._store.delete(coll, id)
|
|
1178
|
+
# Delete from both stores (including versions)
|
|
1179
|
+
doc_deleted = self._document_store.delete(coll, id, delete_versions=delete_versions)
|
|
1180
|
+
chroma_deleted = self._store.delete(coll, id, delete_versions=delete_versions)
|
|
1015
1181
|
return doc_deleted or chroma_deleted
|
|
1016
1182
|
|
|
1017
1183
|
# -------------------------------------------------------------------------
|
|
@@ -1157,7 +1323,7 @@ class Keeper:
|
|
|
1157
1323
|
def count(self, *, collection: Optional[str] = None) -> int:
|
|
1158
1324
|
"""
|
|
1159
1325
|
Count items in a collection.
|
|
1160
|
-
|
|
1326
|
+
|
|
1161
1327
|
Returns count from document store if available, else ChromaDB.
|
|
1162
1328
|
"""
|
|
1163
1329
|
coll = self._resolve_collection(collection)
|
|
@@ -1165,7 +1331,36 @@ class Keeper:
|
|
|
1165
1331
|
if doc_count > 0:
|
|
1166
1332
|
return doc_count
|
|
1167
1333
|
return self._store.count(coll)
|
|
1168
|
-
|
|
1334
|
+
|
|
1335
|
+
def list_recent(
|
|
1336
|
+
self,
|
|
1337
|
+
limit: int = 10,
|
|
1338
|
+
*,
|
|
1339
|
+
collection: Optional[str] = None,
|
|
1340
|
+
) -> list[Item]:
|
|
1341
|
+
"""
|
|
1342
|
+
List recent items ordered by update time.
|
|
1343
|
+
|
|
1344
|
+
Args:
|
|
1345
|
+
limit: Maximum number to return (default 10)
|
|
1346
|
+
collection: Collection to query (uses default if not specified)
|
|
1347
|
+
|
|
1348
|
+
Returns:
|
|
1349
|
+
List of Items, most recently updated first
|
|
1350
|
+
"""
|
|
1351
|
+
coll = self._resolve_collection(collection)
|
|
1352
|
+
records = self._document_store.list_recent(coll, limit)
|
|
1353
|
+
|
|
1354
|
+
return [
|
|
1355
|
+
Item(
|
|
1356
|
+
id=rec.id,
|
|
1357
|
+
summary=rec.summary,
|
|
1358
|
+
tags=rec.tags,
|
|
1359
|
+
score=None,
|
|
1360
|
+
)
|
|
1361
|
+
for rec in records
|
|
1362
|
+
]
|
|
1363
|
+
|
|
1169
1364
|
def embedding_cache_stats(self) -> dict:
|
|
1170
1365
|
"""
|
|
1171
1366
|
Get embedding cache statistics.
|
|
@@ -1307,6 +1502,66 @@ class Keeper:
|
|
|
1307
1502
|
logger.warning("Failed to spawn background processor: %s", e)
|
|
1308
1503
|
return False
|
|
1309
1504
|
|
|
1505
|
+
def reconcile(
|
|
1506
|
+
self,
|
|
1507
|
+
collection: Optional[str] = None,
|
|
1508
|
+
fix: bool = False,
|
|
1509
|
+
) -> dict:
|
|
1510
|
+
"""
|
|
1511
|
+
Check and optionally fix consistency between DocumentStore and ChromaDB.
|
|
1512
|
+
|
|
1513
|
+
Detects:
|
|
1514
|
+
- Documents in DocumentStore missing from ChromaDB (not searchable)
|
|
1515
|
+
- Documents in ChromaDB missing from DocumentStore (orphaned embeddings)
|
|
1516
|
+
|
|
1517
|
+
Args:
|
|
1518
|
+
collection: Collection to check (None = default collection)
|
|
1519
|
+
fix: If True, re-index documents missing from ChromaDB
|
|
1520
|
+
|
|
1521
|
+
Returns:
|
|
1522
|
+
Dict with 'missing_from_chroma', 'orphaned_in_chroma', 'fixed' counts
|
|
1523
|
+
"""
|
|
1524
|
+
coll = self._resolve_collection(collection)
|
|
1525
|
+
|
|
1526
|
+
# Get IDs from both stores
|
|
1527
|
+
doc_ids = set(self._document_store.list_ids(coll))
|
|
1528
|
+
chroma_ids = set(self._store.list_ids(coll))
|
|
1529
|
+
|
|
1530
|
+
missing_from_chroma = doc_ids - chroma_ids
|
|
1531
|
+
orphaned_in_chroma = chroma_ids - doc_ids
|
|
1532
|
+
|
|
1533
|
+
fixed = 0
|
|
1534
|
+
if fix and missing_from_chroma:
|
|
1535
|
+
for doc_id in missing_from_chroma:
|
|
1536
|
+
try:
|
|
1537
|
+
# Re-fetch and re-index
|
|
1538
|
+
doc_record = self._document_store.get(coll, doc_id)
|
|
1539
|
+
if doc_record:
|
|
1540
|
+
# Fetch original content
|
|
1541
|
+
doc = self._document_provider.fetch(doc_id)
|
|
1542
|
+
embedding = self._get_embedding_provider().embed(doc.content)
|
|
1543
|
+
|
|
1544
|
+
# Write to ChromaDB
|
|
1545
|
+
self._store.upsert(
|
|
1546
|
+
collection=coll,
|
|
1547
|
+
id=doc_id,
|
|
1548
|
+
embedding=embedding,
|
|
1549
|
+
summary=doc_record.summary,
|
|
1550
|
+
tags=doc_record.tags,
|
|
1551
|
+
)
|
|
1552
|
+
fixed += 1
|
|
1553
|
+
logger.info("Reconciled: %s", doc_id)
|
|
1554
|
+
except Exception as e:
|
|
1555
|
+
logger.warning("Failed to reconcile %s: %s", doc_id, e)
|
|
1556
|
+
|
|
1557
|
+
return {
|
|
1558
|
+
"missing_from_chroma": len(missing_from_chroma),
|
|
1559
|
+
"orphaned_in_chroma": len(orphaned_in_chroma),
|
|
1560
|
+
"fixed": fixed,
|
|
1561
|
+
"missing_ids": list(missing_from_chroma) if missing_from_chroma else [],
|
|
1562
|
+
"orphaned_ids": list(orphaned_in_chroma) if orphaned_in_chroma else [],
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1310
1565
|
def close(self) -> None:
|
|
1311
1566
|
"""
|
|
1312
1567
|
Close resources (embedding cache connection, pending queue, etc.).
|