keep-skill 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keep/__init__.py CHANGED
@@ -40,7 +40,7 @@ if not os.environ.get("KEEP_VERBOSE"):
40
40
  from .api import Keeper, NOWDOC_ID
41
41
  from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
42
42
 
43
- __version__ = "0.2.0"
43
+ __version__ = "0.4.1"
44
44
  __all__ = [
45
45
  "Keeper",
46
46
  "Item",
keep/api.py CHANGED
@@ -114,6 +114,7 @@ from .providers.base import (
114
114
  SummarizationProvider,
115
115
  )
116
116
  from .providers.embedding_cache import CachingEmbeddingProvider
117
+ from .document_store import VersionInfo
117
118
  from .store import ChromaStore
118
119
  from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
119
120
 
@@ -192,6 +193,25 @@ def _content_hash(content: str) -> str:
192
193
  return hashlib.sha256(content.encode("utf-8")).hexdigest()
193
194
 
194
195
 
196
+ def _text_content_id(content: str) -> str:
197
+ """
198
+ Generate a content-addressed ID for text updates.
199
+
200
+ This makes text updates versioned by content:
201
+ - `keep update "my note"` → ID = _text:{hash[:12]}
202
+ - `keep update "my note" -t status=done` → same ID, new version
203
+ - `keep update "different note"` → different ID
204
+
205
+ Args:
206
+ content: The text content
207
+
208
+ Returns:
209
+ Content-addressed ID in format _text:{hash[:12]}
210
+ """
211
+ content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
212
+ return f"_text:{content_hash}"
213
+
214
+
195
215
  class Keeper:
196
216
  """
197
217
  Semantic memory keeper - persistent storage with similarity search.
@@ -516,14 +536,20 @@ class Keeper:
516
536
  if doc.content_type:
517
537
  merged_tags["_content_type"] = doc.content_type
518
538
 
539
+ # Get existing doc info for versioning before upsert
540
+ old_doc = self._document_store.get(coll, id)
541
+
519
542
  # Dual-write: document store (canonical) + ChromaDB (embedding index)
520
- self._document_store.upsert(
543
+ # DocumentStore.upsert now returns (record, content_changed) and archives old version
544
+ doc_record, content_changed = self._document_store.upsert(
521
545
  collection=coll,
522
546
  id=id,
523
547
  summary=final_summary,
524
548
  tags=merged_tags,
525
549
  content_hash=new_hash,
526
550
  )
551
+
552
+ # Store embedding for current version
527
553
  self._store.upsert(
528
554
  collection=coll,
529
555
  id=id,
@@ -532,6 +558,23 @@ class Keeper:
532
558
  tags=merged_tags,
533
559
  )
534
560
 
561
+ # If content changed and we archived a version, also store versioned embedding
562
+ # Skip if content hash is same (only tags/summary changed)
563
+ if old_doc is not None and content_changed:
564
+ # Get the version number that was just archived
565
+ version_count = self._document_store.version_count(coll, id)
566
+ if version_count > 0:
567
+ # Re-embed the old content for the archived version
568
+ old_embedding = self._get_embedding_provider().embed(old_doc.summary)
569
+ self._store.upsert_version(
570
+ collection=coll,
571
+ id=id,
572
+ version=version_count,
573
+ embedding=old_embedding,
574
+ summary=old_doc.summary,
575
+ tags=old_doc.tags,
576
+ )
577
+
535
578
  # Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
536
579
  if lazy and summary is None and not content_unchanged:
537
580
  self._spawn_processor()
@@ -671,14 +714,20 @@ class Keeper:
671
714
  # Add system tags
672
715
  merged_tags["_source"] = "inline"
673
716
 
717
+ # Get existing doc info for versioning before upsert
718
+ old_doc = self._document_store.get(coll, id)
719
+
674
720
  # Dual-write: document store (canonical) + ChromaDB (embedding index)
675
- self._document_store.upsert(
721
+ # DocumentStore.upsert now returns (record, content_changed) and archives old version
722
+ doc_record, content_changed = self._document_store.upsert(
676
723
  collection=coll,
677
724
  id=id,
678
725
  summary=final_summary,
679
726
  tags=merged_tags,
680
727
  content_hash=new_hash,
681
728
  )
729
+
730
+ # Store embedding for current version
682
731
  self._store.upsert(
683
732
  collection=coll,
684
733
  id=id,
@@ -687,6 +736,23 @@ class Keeper:
687
736
  tags=merged_tags,
688
737
  )
689
738
 
739
+ # If content changed and we archived a version, also store versioned embedding
740
+ # Skip if content hash is same (only tags/summary changed)
741
+ if old_doc is not None and content_changed:
742
+ # Get the version number that was just archived
743
+ version_count = self._document_store.version_count(coll, id)
744
+ if version_count > 0:
745
+ # Re-embed the old content for the archived version
746
+ old_embedding = self._get_embedding_provider().embed(old_doc.summary)
747
+ self._store.upsert_version(
748
+ collection=coll,
749
+ id=id,
750
+ version=version_count,
751
+ embedding=old_embedding,
752
+ summary=old_doc.summary,
753
+ tags=old_doc.tags,
754
+ )
755
+
690
756
  # Spawn background processor if lazy and content was queued (only if content changed)
691
757
  if lazy and summary is None and len(content) > max_len and not content_unchanged:
692
758
  self._spawn_processor()
@@ -836,7 +902,66 @@ class Keeper:
836
902
  items = _filter_by_date(items, since)
837
903
 
838
904
  return items[:limit]
839
-
905
+
906
+ def get_similar_for_display(
907
+ self,
908
+ id: str,
909
+ *,
910
+ limit: int = 3,
911
+ collection: Optional[str] = None
912
+ ) -> list[Item]:
913
+ """
914
+ Find similar items for frontmatter display using stored embedding.
915
+
916
+ Optimized for display: uses stored embedding (no re-embedding),
917
+ filters to distinct base documents, excludes source document versions.
918
+
919
+ Args:
920
+ id: ID of item to find similar items for
921
+ limit: Maximum results to return
922
+ collection: Target collection
923
+
924
+ Returns:
925
+ List of similar items, one per unique base document
926
+ """
927
+ coll = self._resolve_collection(collection)
928
+
929
+ # Get the stored embedding (no re-embedding)
930
+ embedding = self._store.get_embedding(coll, id)
931
+ if embedding is None:
932
+ return []
933
+
934
+ # Fetch more than needed to account for version filtering
935
+ fetch_limit = limit * 3
936
+ results = self._store.query_embedding(coll, embedding, limit=fetch_limit)
937
+
938
+ # Convert to Items
939
+ items = [r.to_item() for r in results]
940
+
941
+ # Extract base ID of source document
942
+ source_base_id = id.split("@v")[0] if "@v" in id else id
943
+
944
+ # Filter to distinct base IDs, excluding source document
945
+ seen_base_ids: set[str] = set()
946
+ filtered: list[Item] = []
947
+ for item in items:
948
+ # Get base ID from tags or parse from ID
949
+ base_id = item.tags.get("_base_id", item.id.split("@v")[0] if "@v" in item.id else item.id)
950
+
951
+ # Skip versions of source document
952
+ if base_id == source_base_id:
953
+ continue
954
+
955
+ # Keep only first version of each document
956
+ if base_id not in seen_base_ids:
957
+ seen_base_ids.add(base_id)
958
+ filtered.append(item)
959
+
960
+ if len(filtered) >= limit:
961
+ break
962
+
963
+ return filtered
964
+
840
965
  def query_fulltext(
841
966
  self,
842
967
  query: str,
@@ -993,7 +1118,95 @@ class Keeper:
993
1118
  if result is None:
994
1119
  return None
995
1120
  return result.to_item()
996
-
1121
+
1122
+ def get_version(
1123
+ self,
1124
+ id: str,
1125
+ offset: int = 0,
1126
+ *,
1127
+ collection: Optional[str] = None,
1128
+ ) -> Optional[Item]:
1129
+ """
1130
+ Get a specific version of a document by offset.
1131
+
1132
+ Offset semantics:
1133
+ - 0 = current version
1134
+ - 1 = previous version
1135
+ - 2 = two versions ago
1136
+ - etc.
1137
+
1138
+ Args:
1139
+ id: Document identifier
1140
+ offset: Version offset (0=current, 1=previous, etc.)
1141
+ collection: Target collection
1142
+
1143
+ Returns:
1144
+ Item if found, None if version doesn't exist
1145
+ """
1146
+ coll = self._resolve_collection(collection)
1147
+
1148
+ if offset == 0:
1149
+ # Current version
1150
+ return self.get(id, collection=collection)
1151
+
1152
+ # Get archived version
1153
+ version_info = self._document_store.get_version(coll, id, offset)
1154
+ if version_info is None:
1155
+ return None
1156
+
1157
+ return Item(
1158
+ id=id,
1159
+ summary=version_info.summary,
1160
+ tags=version_info.tags,
1161
+ )
1162
+
1163
+ def list_versions(
1164
+ self,
1165
+ id: str,
1166
+ limit: int = 10,
1167
+ *,
1168
+ collection: Optional[str] = None,
1169
+ ) -> list[VersionInfo]:
1170
+ """
1171
+ List version history for a document.
1172
+
1173
+ Returns versions in reverse chronological order (newest archived first).
1174
+ Does not include the current version.
1175
+
1176
+ Args:
1177
+ id: Document identifier
1178
+ limit: Maximum versions to return
1179
+ collection: Target collection
1180
+
1181
+ Returns:
1182
+ List of VersionInfo, newest archived first
1183
+ """
1184
+ coll = self._resolve_collection(collection)
1185
+ return self._document_store.list_versions(coll, id, limit)
1186
+
1187
+ def get_version_nav(
1188
+ self,
1189
+ id: str,
1190
+ current_version: Optional[int] = None,
1191
+ limit: int = 3,
1192
+ *,
1193
+ collection: Optional[str] = None,
1194
+ ) -> dict[str, list[VersionInfo]]:
1195
+ """
1196
+ Get version navigation info (prev/next) for display.
1197
+
1198
+ Args:
1199
+ id: Document identifier
1200
+ current_version: The version being viewed (None = current/live version)
1201
+ limit: Max previous versions to return when viewing current
1202
+ collection: Target collection
1203
+
1204
+ Returns:
1205
+ Dict with 'prev' and optionally 'next' lists of VersionInfo.
1206
+ """
1207
+ coll = self._resolve_collection(collection)
1208
+ return self._document_store.get_version_nav(coll, id, current_version, limit)
1209
+
997
1210
  def exists(self, id: str, *, collection: Optional[str] = None) -> bool:
998
1211
  """
999
1212
  Check if an item exists in the store.
@@ -1002,16 +1215,28 @@ class Keeper:
1002
1215
  # Check document store first, then ChromaDB
1003
1216
  return self._document_store.exists(coll, id) or self._store.exists(coll, id)
1004
1217
 
1005
- def delete(self, id: str, *, collection: Optional[str] = None) -> bool:
1218
+ def delete(
1219
+ self,
1220
+ id: str,
1221
+ *,
1222
+ collection: Optional[str] = None,
1223
+ delete_versions: bool = True,
1224
+ ) -> bool:
1006
1225
  """
1007
1226
  Delete an item from both stores.
1008
1227
 
1009
- Returns True if item existed and was deleted.
1228
+ Args:
1229
+ id: Document identifier
1230
+ collection: Target collection
1231
+ delete_versions: If True, also delete version history
1232
+
1233
+ Returns:
1234
+ True if item existed and was deleted.
1010
1235
  """
1011
1236
  coll = self._resolve_collection(collection)
1012
- # Delete from both stores
1013
- doc_deleted = self._document_store.delete(coll, id)
1014
- chroma_deleted = self._store.delete(coll, id)
1237
+ # Delete from both stores (including versions)
1238
+ doc_deleted = self._document_store.delete(coll, id, delete_versions=delete_versions)
1239
+ chroma_deleted = self._store.delete(coll, id, delete_versions=delete_versions)
1015
1240
  return doc_deleted or chroma_deleted
1016
1241
 
1017
1242
  # -------------------------------------------------------------------------
@@ -1157,7 +1382,7 @@ class Keeper:
1157
1382
  def count(self, *, collection: Optional[str] = None) -> int:
1158
1383
  """
1159
1384
  Count items in a collection.
1160
-
1385
+
1161
1386
  Returns count from document store if available, else ChromaDB.
1162
1387
  """
1163
1388
  coll = self._resolve_collection(collection)
@@ -1165,7 +1390,36 @@ class Keeper:
1165
1390
  if doc_count > 0:
1166
1391
  return doc_count
1167
1392
  return self._store.count(coll)
1168
-
1393
+
1394
+ def list_recent(
1395
+ self,
1396
+ limit: int = 10,
1397
+ *,
1398
+ collection: Optional[str] = None,
1399
+ ) -> list[Item]:
1400
+ """
1401
+ List recent items ordered by update time.
1402
+
1403
+ Args:
1404
+ limit: Maximum number to return (default 10)
1405
+ collection: Collection to query (uses default if not specified)
1406
+
1407
+ Returns:
1408
+ List of Items, most recently updated first
1409
+ """
1410
+ coll = self._resolve_collection(collection)
1411
+ records = self._document_store.list_recent(coll, limit)
1412
+
1413
+ return [
1414
+ Item(
1415
+ id=rec.id,
1416
+ summary=rec.summary,
1417
+ tags=rec.tags,
1418
+ score=None,
1419
+ )
1420
+ for rec in records
1421
+ ]
1422
+
1169
1423
  def embedding_cache_stats(self) -> dict:
1170
1424
  """
1171
1425
  Get embedding cache statistics.
@@ -1307,6 +1561,66 @@ class Keeper:
1307
1561
  logger.warning("Failed to spawn background processor: %s", e)
1308
1562
  return False
1309
1563
 
1564
+ def reconcile(
1565
+ self,
1566
+ collection: Optional[str] = None,
1567
+ fix: bool = False,
1568
+ ) -> dict:
1569
+ """
1570
+ Check and optionally fix consistency between DocumentStore and ChromaDB.
1571
+
1572
+ Detects:
1573
+ - Documents in DocumentStore missing from ChromaDB (not searchable)
1574
+ - Documents in ChromaDB missing from DocumentStore (orphaned embeddings)
1575
+
1576
+ Args:
1577
+ collection: Collection to check (None = default collection)
1578
+ fix: If True, re-index documents missing from ChromaDB
1579
+
1580
+ Returns:
1581
+ Dict with 'missing_from_chroma', 'orphaned_in_chroma', 'fixed' counts
1582
+ """
1583
+ coll = self._resolve_collection(collection)
1584
+
1585
+ # Get IDs from both stores
1586
+ doc_ids = set(self._document_store.list_ids(coll))
1587
+ chroma_ids = set(self._store.list_ids(coll))
1588
+
1589
+ missing_from_chroma = doc_ids - chroma_ids
1590
+ orphaned_in_chroma = chroma_ids - doc_ids
1591
+
1592
+ fixed = 0
1593
+ if fix and missing_from_chroma:
1594
+ for doc_id in missing_from_chroma:
1595
+ try:
1596
+ # Re-fetch and re-index
1597
+ doc_record = self._document_store.get(coll, doc_id)
1598
+ if doc_record:
1599
+ # Fetch original content
1600
+ doc = self._document_provider.fetch(doc_id)
1601
+ embedding = self._get_embedding_provider().embed(doc.content)
1602
+
1603
+ # Write to ChromaDB
1604
+ self._store.upsert(
1605
+ collection=coll,
1606
+ id=doc_id,
1607
+ embedding=embedding,
1608
+ summary=doc_record.summary,
1609
+ tags=doc_record.tags,
1610
+ )
1611
+ fixed += 1
1612
+ logger.info("Reconciled: %s", doc_id)
1613
+ except Exception as e:
1614
+ logger.warning("Failed to reconcile %s: %s", doc_id, e)
1615
+
1616
+ return {
1617
+ "missing_from_chroma": len(missing_from_chroma),
1618
+ "orphaned_in_chroma": len(orphaned_in_chroma),
1619
+ "fixed": fixed,
1620
+ "missing_ids": list(missing_from_chroma) if missing_from_chroma else [],
1621
+ "orphaned_ids": list(orphaned_in_chroma) if orphaned_in_chroma else [],
1622
+ }
1623
+
1310
1624
  def close(self) -> None:
1311
1625
  """
1312
1626
  Close resources (embedding cache connection, pending queue, etc.).