keep-skill 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +1 -1
- keep/api.py +325 -11
- keep/cli.py +453 -83
- keep/config.py +2 -2
- keep/document_store.py +351 -12
- keep/pending_summaries.py +6 -0
- keep/providers/embedding_cache.py +6 -0
- keep/store.py +128 -11
- keep_skill-0.4.1.dist-info/METADATA +219 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/RECORD +13 -13
- keep_skill-0.2.0.dist-info/METADATA +0 -304
- {keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/WHEEL +0 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/entry_points.txt +0 -0
- {keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/licenses/LICENSE +0 -0
keep/__init__.py
CHANGED
keep/api.py
CHANGED
|
@@ -114,6 +114,7 @@ from .providers.base import (
|
|
|
114
114
|
SummarizationProvider,
|
|
115
115
|
)
|
|
116
116
|
from .providers.embedding_cache import CachingEmbeddingProvider
|
|
117
|
+
from .document_store import VersionInfo
|
|
117
118
|
from .store import ChromaStore
|
|
118
119
|
from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
|
|
119
120
|
|
|
@@ -192,6 +193,25 @@ def _content_hash(content: str) -> str:
|
|
|
192
193
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
193
194
|
|
|
194
195
|
|
|
196
|
+
def _text_content_id(content: str) -> str:
|
|
197
|
+
"""
|
|
198
|
+
Generate a content-addressed ID for text updates.
|
|
199
|
+
|
|
200
|
+
This makes text updates versioned by content:
|
|
201
|
+
- `keep update "my note"` → ID = _text:{hash[:12]}
|
|
202
|
+
- `keep update "my note" -t status=done` → same ID, new version
|
|
203
|
+
- `keep update "different note"` → different ID
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
content: The text content
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Content-addressed ID in format _text:{hash[:12]}
|
|
210
|
+
"""
|
|
211
|
+
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
|
|
212
|
+
return f"_text:{content_hash}"
|
|
213
|
+
|
|
214
|
+
|
|
195
215
|
class Keeper:
|
|
196
216
|
"""
|
|
197
217
|
Semantic memory keeper - persistent storage with similarity search.
|
|
@@ -516,14 +536,20 @@ class Keeper:
|
|
|
516
536
|
if doc.content_type:
|
|
517
537
|
merged_tags["_content_type"] = doc.content_type
|
|
518
538
|
|
|
539
|
+
# Get existing doc info for versioning before upsert
|
|
540
|
+
old_doc = self._document_store.get(coll, id)
|
|
541
|
+
|
|
519
542
|
# Dual-write: document store (canonical) + ChromaDB (embedding index)
|
|
520
|
-
|
|
543
|
+
# DocumentStore.upsert now returns (record, content_changed) and archives old version
|
|
544
|
+
doc_record, content_changed = self._document_store.upsert(
|
|
521
545
|
collection=coll,
|
|
522
546
|
id=id,
|
|
523
547
|
summary=final_summary,
|
|
524
548
|
tags=merged_tags,
|
|
525
549
|
content_hash=new_hash,
|
|
526
550
|
)
|
|
551
|
+
|
|
552
|
+
# Store embedding for current version
|
|
527
553
|
self._store.upsert(
|
|
528
554
|
collection=coll,
|
|
529
555
|
id=id,
|
|
@@ -532,6 +558,23 @@ class Keeper:
|
|
|
532
558
|
tags=merged_tags,
|
|
533
559
|
)
|
|
534
560
|
|
|
561
|
+
# If content changed and we archived a version, also store versioned embedding
|
|
562
|
+
# Skip if content hash is same (only tags/summary changed)
|
|
563
|
+
if old_doc is not None and content_changed:
|
|
564
|
+
# Get the version number that was just archived
|
|
565
|
+
version_count = self._document_store.version_count(coll, id)
|
|
566
|
+
if version_count > 0:
|
|
567
|
+
# Re-embed the old content for the archived version
|
|
568
|
+
old_embedding = self._get_embedding_provider().embed(old_doc.summary)
|
|
569
|
+
self._store.upsert_version(
|
|
570
|
+
collection=coll,
|
|
571
|
+
id=id,
|
|
572
|
+
version=version_count,
|
|
573
|
+
embedding=old_embedding,
|
|
574
|
+
summary=old_doc.summary,
|
|
575
|
+
tags=old_doc.tags,
|
|
576
|
+
)
|
|
577
|
+
|
|
535
578
|
# Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
|
|
536
579
|
if lazy and summary is None and not content_unchanged:
|
|
537
580
|
self._spawn_processor()
|
|
@@ -671,14 +714,20 @@ class Keeper:
|
|
|
671
714
|
# Add system tags
|
|
672
715
|
merged_tags["_source"] = "inline"
|
|
673
716
|
|
|
717
|
+
# Get existing doc info for versioning before upsert
|
|
718
|
+
old_doc = self._document_store.get(coll, id)
|
|
719
|
+
|
|
674
720
|
# Dual-write: document store (canonical) + ChromaDB (embedding index)
|
|
675
|
-
|
|
721
|
+
# DocumentStore.upsert now returns (record, content_changed) and archives old version
|
|
722
|
+
doc_record, content_changed = self._document_store.upsert(
|
|
676
723
|
collection=coll,
|
|
677
724
|
id=id,
|
|
678
725
|
summary=final_summary,
|
|
679
726
|
tags=merged_tags,
|
|
680
727
|
content_hash=new_hash,
|
|
681
728
|
)
|
|
729
|
+
|
|
730
|
+
# Store embedding for current version
|
|
682
731
|
self._store.upsert(
|
|
683
732
|
collection=coll,
|
|
684
733
|
id=id,
|
|
@@ -687,6 +736,23 @@ class Keeper:
|
|
|
687
736
|
tags=merged_tags,
|
|
688
737
|
)
|
|
689
738
|
|
|
739
|
+
# If content changed and we archived a version, also store versioned embedding
|
|
740
|
+
# Skip if content hash is same (only tags/summary changed)
|
|
741
|
+
if old_doc is not None and content_changed:
|
|
742
|
+
# Get the version number that was just archived
|
|
743
|
+
version_count = self._document_store.version_count(coll, id)
|
|
744
|
+
if version_count > 0:
|
|
745
|
+
# Re-embed the old content for the archived version
|
|
746
|
+
old_embedding = self._get_embedding_provider().embed(old_doc.summary)
|
|
747
|
+
self._store.upsert_version(
|
|
748
|
+
collection=coll,
|
|
749
|
+
id=id,
|
|
750
|
+
version=version_count,
|
|
751
|
+
embedding=old_embedding,
|
|
752
|
+
summary=old_doc.summary,
|
|
753
|
+
tags=old_doc.tags,
|
|
754
|
+
)
|
|
755
|
+
|
|
690
756
|
# Spawn background processor if lazy and content was queued (only if content changed)
|
|
691
757
|
if lazy and summary is None and len(content) > max_len and not content_unchanged:
|
|
692
758
|
self._spawn_processor()
|
|
@@ -836,7 +902,66 @@ class Keeper:
|
|
|
836
902
|
items = _filter_by_date(items, since)
|
|
837
903
|
|
|
838
904
|
return items[:limit]
|
|
839
|
-
|
|
905
|
+
|
|
906
|
+
def get_similar_for_display(
|
|
907
|
+
self,
|
|
908
|
+
id: str,
|
|
909
|
+
*,
|
|
910
|
+
limit: int = 3,
|
|
911
|
+
collection: Optional[str] = None
|
|
912
|
+
) -> list[Item]:
|
|
913
|
+
"""
|
|
914
|
+
Find similar items for frontmatter display using stored embedding.
|
|
915
|
+
|
|
916
|
+
Optimized for display: uses stored embedding (no re-embedding),
|
|
917
|
+
filters to distinct base documents, excludes source document versions.
|
|
918
|
+
|
|
919
|
+
Args:
|
|
920
|
+
id: ID of item to find similar items for
|
|
921
|
+
limit: Maximum results to return
|
|
922
|
+
collection: Target collection
|
|
923
|
+
|
|
924
|
+
Returns:
|
|
925
|
+
List of similar items, one per unique base document
|
|
926
|
+
"""
|
|
927
|
+
coll = self._resolve_collection(collection)
|
|
928
|
+
|
|
929
|
+
# Get the stored embedding (no re-embedding)
|
|
930
|
+
embedding = self._store.get_embedding(coll, id)
|
|
931
|
+
if embedding is None:
|
|
932
|
+
return []
|
|
933
|
+
|
|
934
|
+
# Fetch more than needed to account for version filtering
|
|
935
|
+
fetch_limit = limit * 3
|
|
936
|
+
results = self._store.query_embedding(coll, embedding, limit=fetch_limit)
|
|
937
|
+
|
|
938
|
+
# Convert to Items
|
|
939
|
+
items = [r.to_item() for r in results]
|
|
940
|
+
|
|
941
|
+
# Extract base ID of source document
|
|
942
|
+
source_base_id = id.split("@v")[0] if "@v" in id else id
|
|
943
|
+
|
|
944
|
+
# Filter to distinct base IDs, excluding source document
|
|
945
|
+
seen_base_ids: set[str] = set()
|
|
946
|
+
filtered: list[Item] = []
|
|
947
|
+
for item in items:
|
|
948
|
+
# Get base ID from tags or parse from ID
|
|
949
|
+
base_id = item.tags.get("_base_id", item.id.split("@v")[0] if "@v" in item.id else item.id)
|
|
950
|
+
|
|
951
|
+
# Skip versions of source document
|
|
952
|
+
if base_id == source_base_id:
|
|
953
|
+
continue
|
|
954
|
+
|
|
955
|
+
# Keep only first version of each document
|
|
956
|
+
if base_id not in seen_base_ids:
|
|
957
|
+
seen_base_ids.add(base_id)
|
|
958
|
+
filtered.append(item)
|
|
959
|
+
|
|
960
|
+
if len(filtered) >= limit:
|
|
961
|
+
break
|
|
962
|
+
|
|
963
|
+
return filtered
|
|
964
|
+
|
|
840
965
|
def query_fulltext(
|
|
841
966
|
self,
|
|
842
967
|
query: str,
|
|
@@ -993,7 +1118,95 @@ class Keeper:
|
|
|
993
1118
|
if result is None:
|
|
994
1119
|
return None
|
|
995
1120
|
return result.to_item()
|
|
996
|
-
|
|
1121
|
+
|
|
1122
|
+
def get_version(
|
|
1123
|
+
self,
|
|
1124
|
+
id: str,
|
|
1125
|
+
offset: int = 0,
|
|
1126
|
+
*,
|
|
1127
|
+
collection: Optional[str] = None,
|
|
1128
|
+
) -> Optional[Item]:
|
|
1129
|
+
"""
|
|
1130
|
+
Get a specific version of a document by offset.
|
|
1131
|
+
|
|
1132
|
+
Offset semantics:
|
|
1133
|
+
- 0 = current version
|
|
1134
|
+
- 1 = previous version
|
|
1135
|
+
- 2 = two versions ago
|
|
1136
|
+
- etc.
|
|
1137
|
+
|
|
1138
|
+
Args:
|
|
1139
|
+
id: Document identifier
|
|
1140
|
+
offset: Version offset (0=current, 1=previous, etc.)
|
|
1141
|
+
collection: Target collection
|
|
1142
|
+
|
|
1143
|
+
Returns:
|
|
1144
|
+
Item if found, None if version doesn't exist
|
|
1145
|
+
"""
|
|
1146
|
+
coll = self._resolve_collection(collection)
|
|
1147
|
+
|
|
1148
|
+
if offset == 0:
|
|
1149
|
+
# Current version
|
|
1150
|
+
return self.get(id, collection=collection)
|
|
1151
|
+
|
|
1152
|
+
# Get archived version
|
|
1153
|
+
version_info = self._document_store.get_version(coll, id, offset)
|
|
1154
|
+
if version_info is None:
|
|
1155
|
+
return None
|
|
1156
|
+
|
|
1157
|
+
return Item(
|
|
1158
|
+
id=id,
|
|
1159
|
+
summary=version_info.summary,
|
|
1160
|
+
tags=version_info.tags,
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
def list_versions(
|
|
1164
|
+
self,
|
|
1165
|
+
id: str,
|
|
1166
|
+
limit: int = 10,
|
|
1167
|
+
*,
|
|
1168
|
+
collection: Optional[str] = None,
|
|
1169
|
+
) -> list[VersionInfo]:
|
|
1170
|
+
"""
|
|
1171
|
+
List version history for a document.
|
|
1172
|
+
|
|
1173
|
+
Returns versions in reverse chronological order (newest archived first).
|
|
1174
|
+
Does not include the current version.
|
|
1175
|
+
|
|
1176
|
+
Args:
|
|
1177
|
+
id: Document identifier
|
|
1178
|
+
limit: Maximum versions to return
|
|
1179
|
+
collection: Target collection
|
|
1180
|
+
|
|
1181
|
+
Returns:
|
|
1182
|
+
List of VersionInfo, newest archived first
|
|
1183
|
+
"""
|
|
1184
|
+
coll = self._resolve_collection(collection)
|
|
1185
|
+
return self._document_store.list_versions(coll, id, limit)
|
|
1186
|
+
|
|
1187
|
+
def get_version_nav(
|
|
1188
|
+
self,
|
|
1189
|
+
id: str,
|
|
1190
|
+
current_version: Optional[int] = None,
|
|
1191
|
+
limit: int = 3,
|
|
1192
|
+
*,
|
|
1193
|
+
collection: Optional[str] = None,
|
|
1194
|
+
) -> dict[str, list[VersionInfo]]:
|
|
1195
|
+
"""
|
|
1196
|
+
Get version navigation info (prev/next) for display.
|
|
1197
|
+
|
|
1198
|
+
Args:
|
|
1199
|
+
id: Document identifier
|
|
1200
|
+
current_version: The version being viewed (None = current/live version)
|
|
1201
|
+
limit: Max previous versions to return when viewing current
|
|
1202
|
+
collection: Target collection
|
|
1203
|
+
|
|
1204
|
+
Returns:
|
|
1205
|
+
Dict with 'prev' and optionally 'next' lists of VersionInfo.
|
|
1206
|
+
"""
|
|
1207
|
+
coll = self._resolve_collection(collection)
|
|
1208
|
+
return self._document_store.get_version_nav(coll, id, current_version, limit)
|
|
1209
|
+
|
|
997
1210
|
def exists(self, id: str, *, collection: Optional[str] = None) -> bool:
|
|
998
1211
|
"""
|
|
999
1212
|
Check if an item exists in the store.
|
|
@@ -1002,16 +1215,28 @@ class Keeper:
|
|
|
1002
1215
|
# Check document store first, then ChromaDB
|
|
1003
1216
|
return self._document_store.exists(coll, id) or self._store.exists(coll, id)
|
|
1004
1217
|
|
|
1005
|
-
def delete(
|
|
1218
|
+
def delete(
|
|
1219
|
+
self,
|
|
1220
|
+
id: str,
|
|
1221
|
+
*,
|
|
1222
|
+
collection: Optional[str] = None,
|
|
1223
|
+
delete_versions: bool = True,
|
|
1224
|
+
) -> bool:
|
|
1006
1225
|
"""
|
|
1007
1226
|
Delete an item from both stores.
|
|
1008
1227
|
|
|
1009
|
-
|
|
1228
|
+
Args:
|
|
1229
|
+
id: Document identifier
|
|
1230
|
+
collection: Target collection
|
|
1231
|
+
delete_versions: If True, also delete version history
|
|
1232
|
+
|
|
1233
|
+
Returns:
|
|
1234
|
+
True if item existed and was deleted.
|
|
1010
1235
|
"""
|
|
1011
1236
|
coll = self._resolve_collection(collection)
|
|
1012
|
-
# Delete from both stores
|
|
1013
|
-
doc_deleted = self._document_store.delete(coll, id)
|
|
1014
|
-
chroma_deleted = self._store.delete(coll, id)
|
|
1237
|
+
# Delete from both stores (including versions)
|
|
1238
|
+
doc_deleted = self._document_store.delete(coll, id, delete_versions=delete_versions)
|
|
1239
|
+
chroma_deleted = self._store.delete(coll, id, delete_versions=delete_versions)
|
|
1015
1240
|
return doc_deleted or chroma_deleted
|
|
1016
1241
|
|
|
1017
1242
|
# -------------------------------------------------------------------------
|
|
@@ -1157,7 +1382,7 @@ class Keeper:
|
|
|
1157
1382
|
def count(self, *, collection: Optional[str] = None) -> int:
|
|
1158
1383
|
"""
|
|
1159
1384
|
Count items in a collection.
|
|
1160
|
-
|
|
1385
|
+
|
|
1161
1386
|
Returns count from document store if available, else ChromaDB.
|
|
1162
1387
|
"""
|
|
1163
1388
|
coll = self._resolve_collection(collection)
|
|
@@ -1165,7 +1390,36 @@ class Keeper:
|
|
|
1165
1390
|
if doc_count > 0:
|
|
1166
1391
|
return doc_count
|
|
1167
1392
|
return self._store.count(coll)
|
|
1168
|
-
|
|
1393
|
+
|
|
1394
|
+
def list_recent(
|
|
1395
|
+
self,
|
|
1396
|
+
limit: int = 10,
|
|
1397
|
+
*,
|
|
1398
|
+
collection: Optional[str] = None,
|
|
1399
|
+
) -> list[Item]:
|
|
1400
|
+
"""
|
|
1401
|
+
List recent items ordered by update time.
|
|
1402
|
+
|
|
1403
|
+
Args:
|
|
1404
|
+
limit: Maximum number to return (default 10)
|
|
1405
|
+
collection: Collection to query (uses default if not specified)
|
|
1406
|
+
|
|
1407
|
+
Returns:
|
|
1408
|
+
List of Items, most recently updated first
|
|
1409
|
+
"""
|
|
1410
|
+
coll = self._resolve_collection(collection)
|
|
1411
|
+
records = self._document_store.list_recent(coll, limit)
|
|
1412
|
+
|
|
1413
|
+
return [
|
|
1414
|
+
Item(
|
|
1415
|
+
id=rec.id,
|
|
1416
|
+
summary=rec.summary,
|
|
1417
|
+
tags=rec.tags,
|
|
1418
|
+
score=None,
|
|
1419
|
+
)
|
|
1420
|
+
for rec in records
|
|
1421
|
+
]
|
|
1422
|
+
|
|
1169
1423
|
def embedding_cache_stats(self) -> dict:
|
|
1170
1424
|
"""
|
|
1171
1425
|
Get embedding cache statistics.
|
|
@@ -1307,6 +1561,66 @@ class Keeper:
|
|
|
1307
1561
|
logger.warning("Failed to spawn background processor: %s", e)
|
|
1308
1562
|
return False
|
|
1309
1563
|
|
|
1564
|
+
def reconcile(
|
|
1565
|
+
self,
|
|
1566
|
+
collection: Optional[str] = None,
|
|
1567
|
+
fix: bool = False,
|
|
1568
|
+
) -> dict:
|
|
1569
|
+
"""
|
|
1570
|
+
Check and optionally fix consistency between DocumentStore and ChromaDB.
|
|
1571
|
+
|
|
1572
|
+
Detects:
|
|
1573
|
+
- Documents in DocumentStore missing from ChromaDB (not searchable)
|
|
1574
|
+
- Documents in ChromaDB missing from DocumentStore (orphaned embeddings)
|
|
1575
|
+
|
|
1576
|
+
Args:
|
|
1577
|
+
collection: Collection to check (None = default collection)
|
|
1578
|
+
fix: If True, re-index documents missing from ChromaDB
|
|
1579
|
+
|
|
1580
|
+
Returns:
|
|
1581
|
+
Dict with 'missing_from_chroma', 'orphaned_in_chroma', 'fixed' counts
|
|
1582
|
+
"""
|
|
1583
|
+
coll = self._resolve_collection(collection)
|
|
1584
|
+
|
|
1585
|
+
# Get IDs from both stores
|
|
1586
|
+
doc_ids = set(self._document_store.list_ids(coll))
|
|
1587
|
+
chroma_ids = set(self._store.list_ids(coll))
|
|
1588
|
+
|
|
1589
|
+
missing_from_chroma = doc_ids - chroma_ids
|
|
1590
|
+
orphaned_in_chroma = chroma_ids - doc_ids
|
|
1591
|
+
|
|
1592
|
+
fixed = 0
|
|
1593
|
+
if fix and missing_from_chroma:
|
|
1594
|
+
for doc_id in missing_from_chroma:
|
|
1595
|
+
try:
|
|
1596
|
+
# Re-fetch and re-index
|
|
1597
|
+
doc_record = self._document_store.get(coll, doc_id)
|
|
1598
|
+
if doc_record:
|
|
1599
|
+
# Fetch original content
|
|
1600
|
+
doc = self._document_provider.fetch(doc_id)
|
|
1601
|
+
embedding = self._get_embedding_provider().embed(doc.content)
|
|
1602
|
+
|
|
1603
|
+
# Write to ChromaDB
|
|
1604
|
+
self._store.upsert(
|
|
1605
|
+
collection=coll,
|
|
1606
|
+
id=doc_id,
|
|
1607
|
+
embedding=embedding,
|
|
1608
|
+
summary=doc_record.summary,
|
|
1609
|
+
tags=doc_record.tags,
|
|
1610
|
+
)
|
|
1611
|
+
fixed += 1
|
|
1612
|
+
logger.info("Reconciled: %s", doc_id)
|
|
1613
|
+
except Exception as e:
|
|
1614
|
+
logger.warning("Failed to reconcile %s: %s", doc_id, e)
|
|
1615
|
+
|
|
1616
|
+
return {
|
|
1617
|
+
"missing_from_chroma": len(missing_from_chroma),
|
|
1618
|
+
"orphaned_in_chroma": len(orphaned_in_chroma),
|
|
1619
|
+
"fixed": fixed,
|
|
1620
|
+
"missing_ids": list(missing_from_chroma) if missing_from_chroma else [],
|
|
1621
|
+
"orphaned_ids": list(orphaned_in_chroma) if orphaned_in_chroma else [],
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1310
1624
|
def close(self) -> None:
|
|
1311
1625
|
"""
|
|
1312
1626
|
Close resources (embedding cache connection, pending queue, etc.).
|