keep-skill 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keep/__init__.py CHANGED
@@ -40,7 +40,7 @@ if not os.environ.get("KEEP_VERBOSE"):
40
40
  from .api import Keeper, NOWDOC_ID
41
41
  from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
42
42
 
43
- __version__ = "0.2.0"
43
+ __version__ = "0.3.0"
44
44
  __all__ = [
45
45
  "Keeper",
46
46
  "Item",
keep/api.py CHANGED
@@ -114,6 +114,7 @@ from .providers.base import (
114
114
  SummarizationProvider,
115
115
  )
116
116
  from .providers.embedding_cache import CachingEmbeddingProvider
117
+ from .document_store import VersionInfo
117
118
  from .store import ChromaStore
118
119
  from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
119
120
 
@@ -192,6 +193,25 @@ def _content_hash(content: str) -> str:
192
193
  return hashlib.sha256(content.encode("utf-8")).hexdigest()
193
194
 
194
195
 
196
+ def _text_content_id(content: str) -> str:
197
+ """
198
+ Generate a content-addressed ID for text updates.
199
+
200
+ This makes text updates versioned by content:
201
+ - `keep update "my note"` → ID = _text:{hash[:12]}
202
+ - `keep update "my note" -t status=done` → same ID, new version
203
+ - `keep update "different note"` → different ID
204
+
205
+ Args:
206
+ content: The text content
207
+
208
+ Returns:
209
+ Content-addressed ID in format _text:{hash[:12]}
210
+ """
211
+ content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
212
+ return f"_text:{content_hash}"
213
+
214
+
195
215
  class Keeper:
196
216
  """
197
217
  Semantic memory keeper - persistent storage with similarity search.
@@ -516,14 +536,20 @@ class Keeper:
516
536
  if doc.content_type:
517
537
  merged_tags["_content_type"] = doc.content_type
518
538
 
539
+ # Get existing doc info for versioning before upsert
540
+ old_doc = self._document_store.get(coll, id)
541
+
519
542
  # Dual-write: document store (canonical) + ChromaDB (embedding index)
520
- self._document_store.upsert(
543
+ # DocumentStore.upsert now returns (record, content_changed) and archives old version
544
+ doc_record, content_changed = self._document_store.upsert(
521
545
  collection=coll,
522
546
  id=id,
523
547
  summary=final_summary,
524
548
  tags=merged_tags,
525
549
  content_hash=new_hash,
526
550
  )
551
+
552
+ # Store embedding for current version
527
553
  self._store.upsert(
528
554
  collection=coll,
529
555
  id=id,
@@ -532,6 +558,23 @@ class Keeper:
532
558
  tags=merged_tags,
533
559
  )
534
560
 
561
+ # If content changed and we archived a version, also store versioned embedding
562
+ # Skip if content hash is same (only tags/summary changed)
563
+ if old_doc is not None and content_changed:
564
+ # Get the version number that was just archived
565
+ version_count = self._document_store.version_count(coll, id)
566
+ if version_count > 0:
567
+ # Re-embed the old content for the archived version
568
+ old_embedding = self._get_embedding_provider().embed(old_doc.summary)
569
+ self._store.upsert_version(
570
+ collection=coll,
571
+ id=id,
572
+ version=version_count,
573
+ embedding=old_embedding,
574
+ summary=old_doc.summary,
575
+ tags=old_doc.tags,
576
+ )
577
+
535
578
  # Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
536
579
  if lazy and summary is None and not content_unchanged:
537
580
  self._spawn_processor()
@@ -671,14 +714,20 @@ class Keeper:
671
714
  # Add system tags
672
715
  merged_tags["_source"] = "inline"
673
716
 
717
+ # Get existing doc info for versioning before upsert
718
+ old_doc = self._document_store.get(coll, id)
719
+
674
720
  # Dual-write: document store (canonical) + ChromaDB (embedding index)
675
- self._document_store.upsert(
721
+ # DocumentStore.upsert now returns (record, content_changed) and archives old version
722
+ doc_record, content_changed = self._document_store.upsert(
676
723
  collection=coll,
677
724
  id=id,
678
725
  summary=final_summary,
679
726
  tags=merged_tags,
680
727
  content_hash=new_hash,
681
728
  )
729
+
730
+ # Store embedding for current version
682
731
  self._store.upsert(
683
732
  collection=coll,
684
733
  id=id,
@@ -687,6 +736,23 @@ class Keeper:
687
736
  tags=merged_tags,
688
737
  )
689
738
 
739
+ # If content changed and we archived a version, also store versioned embedding
740
+ # Skip if content hash is same (only tags/summary changed)
741
+ if old_doc is not None and content_changed:
742
+ # Get the version number that was just archived
743
+ version_count = self._document_store.version_count(coll, id)
744
+ if version_count > 0:
745
+ # Re-embed the old content for the archived version
746
+ old_embedding = self._get_embedding_provider().embed(old_doc.summary)
747
+ self._store.upsert_version(
748
+ collection=coll,
749
+ id=id,
750
+ version=version_count,
751
+ embedding=old_embedding,
752
+ summary=old_doc.summary,
753
+ tags=old_doc.tags,
754
+ )
755
+
690
756
  # Spawn background processor if lazy and content was queued (only if content changed)
691
757
  if lazy and summary is None and len(content) > max_len and not content_unchanged:
692
758
  self._spawn_processor()
@@ -993,7 +1059,95 @@ class Keeper:
993
1059
  if result is None:
994
1060
  return None
995
1061
  return result.to_item()
996
-
1062
+
1063
+ def get_version(
1064
+ self,
1065
+ id: str,
1066
+ offset: int = 0,
1067
+ *,
1068
+ collection: Optional[str] = None,
1069
+ ) -> Optional[Item]:
1070
+ """
1071
+ Get a specific version of a document by offset.
1072
+
1073
+ Offset semantics:
1074
+ - 0 = current version
1075
+ - 1 = previous version
1076
+ - 2 = two versions ago
1077
+ - etc.
1078
+
1079
+ Args:
1080
+ id: Document identifier
1081
+ offset: Version offset (0=current, 1=previous, etc.)
1082
+ collection: Target collection
1083
+
1084
+ Returns:
1085
+ Item if found, None if version doesn't exist
1086
+ """
1087
+ coll = self._resolve_collection(collection)
1088
+
1089
+ if offset == 0:
1090
+ # Current version
1091
+ return self.get(id, collection=collection)
1092
+
1093
+ # Get archived version
1094
+ version_info = self._document_store.get_version(coll, id, offset)
1095
+ if version_info is None:
1096
+ return None
1097
+
1098
+ return Item(
1099
+ id=id,
1100
+ summary=version_info.summary,
1101
+ tags=version_info.tags,
1102
+ )
1103
+
1104
+ def list_versions(
1105
+ self,
1106
+ id: str,
1107
+ limit: int = 10,
1108
+ *,
1109
+ collection: Optional[str] = None,
1110
+ ) -> list[VersionInfo]:
1111
+ """
1112
+ List version history for a document.
1113
+
1114
+ Returns versions in reverse chronological order (newest archived first).
1115
+ Does not include the current version.
1116
+
1117
+ Args:
1118
+ id: Document identifier
1119
+ limit: Maximum versions to return
1120
+ collection: Target collection
1121
+
1122
+ Returns:
1123
+ List of VersionInfo, newest archived first
1124
+ """
1125
+ coll = self._resolve_collection(collection)
1126
+ return self._document_store.list_versions(coll, id, limit)
1127
+
1128
+ def get_version_nav(
1129
+ self,
1130
+ id: str,
1131
+ current_version: Optional[int] = None,
1132
+ limit: int = 3,
1133
+ *,
1134
+ collection: Optional[str] = None,
1135
+ ) -> dict[str, list[VersionInfo]]:
1136
+ """
1137
+ Get version navigation info (prev/next) for display.
1138
+
1139
+ Args:
1140
+ id: Document identifier
1141
+ current_version: The version being viewed (None = current/live version)
1142
+ limit: Max previous versions to return when viewing current
1143
+ collection: Target collection
1144
+
1145
+ Returns:
1146
+ Dict with 'prev' and optionally 'next' lists of VersionInfo.
1147
+ """
1148
+ coll = self._resolve_collection(collection)
1149
+ return self._document_store.get_version_nav(coll, id, current_version, limit)
1150
+
997
1151
  def exists(self, id: str, *, collection: Optional[str] = None) -> bool:
998
1152
  """
999
1153
  Check if an item exists in the store.
@@ -1002,16 +1156,28 @@ class Keeper:
1002
1156
  # Check document store first, then ChromaDB
1003
1157
  return self._document_store.exists(coll, id) or self._store.exists(coll, id)
1004
1158
 
1005
- def delete(self, id: str, *, collection: Optional[str] = None) -> bool:
1159
+ def delete(
1160
+ self,
1161
+ id: str,
1162
+ *,
1163
+ collection: Optional[str] = None,
1164
+ delete_versions: bool = True,
1165
+ ) -> bool:
1006
1166
  """
1007
1167
  Delete an item from both stores.
1008
1168
 
1009
- Returns True if item existed and was deleted.
1169
+ Args:
1170
+ id: Document identifier
1171
+ collection: Target collection
1172
+ delete_versions: If True, also delete version history
1173
+
1174
+ Returns:
1175
+ True if item existed and was deleted.
1010
1176
  """
1011
1177
  coll = self._resolve_collection(collection)
1012
- # Delete from both stores
1013
- doc_deleted = self._document_store.delete(coll, id)
1014
- chroma_deleted = self._store.delete(coll, id)
1178
+ # Delete from both stores (including versions)
1179
+ doc_deleted = self._document_store.delete(coll, id, delete_versions=delete_versions)
1180
+ chroma_deleted = self._store.delete(coll, id, delete_versions=delete_versions)
1015
1181
  return doc_deleted or chroma_deleted
1016
1182
 
1017
1183
  # -------------------------------------------------------------------------
@@ -1157,7 +1323,7 @@ class Keeper:
1157
1323
  def count(self, *, collection: Optional[str] = None) -> int:
1158
1324
  """
1159
1325
  Count items in a collection.
1160
-
1326
+
1161
1327
  Returns count from document store if available, else ChromaDB.
1162
1328
  """
1163
1329
  coll = self._resolve_collection(collection)
@@ -1165,7 +1331,36 @@ class Keeper:
1165
1331
  if doc_count > 0:
1166
1332
  return doc_count
1167
1333
  return self._store.count(coll)
1168
-
1334
+
1335
+ def list_recent(
1336
+ self,
1337
+ limit: int = 10,
1338
+ *,
1339
+ collection: Optional[str] = None,
1340
+ ) -> list[Item]:
1341
+ """
1342
+ List recent items ordered by update time.
1343
+
1344
+ Args:
1345
+ limit: Maximum number to return (default 10)
1346
+ collection: Collection to query (uses default if not specified)
1347
+
1348
+ Returns:
1349
+ List of Items, most recently updated first
1350
+ """
1351
+ coll = self._resolve_collection(collection)
1352
+ records = self._document_store.list_recent(coll, limit)
1353
+
1354
+ return [
1355
+ Item(
1356
+ id=rec.id,
1357
+ summary=rec.summary,
1358
+ tags=rec.tags,
1359
+ score=None,
1360
+ )
1361
+ for rec in records
1362
+ ]
1363
+
1169
1364
  def embedding_cache_stats(self) -> dict:
1170
1365
  """
1171
1366
  Get embedding cache statistics.
@@ -1307,6 +1502,66 @@ class Keeper:
1307
1502
  logger.warning("Failed to spawn background processor: %s", e)
1308
1503
  return False
1309
1504
 
1505
+ def reconcile(
1506
+ self,
1507
+ collection: Optional[str] = None,
1508
+ fix: bool = False,
1509
+ ) -> dict:
1510
+ """
1511
+ Check and optionally fix consistency between DocumentStore and ChromaDB.
1512
+
1513
+ Detects:
1514
+ - Documents in DocumentStore missing from ChromaDB (not searchable)
1515
+ - Documents in ChromaDB missing from DocumentStore (orphaned embeddings)
1516
+
1517
+ Args:
1518
+ collection: Collection to check (None = default collection)
1519
+ fix: If True, re-index documents missing from ChromaDB
1520
+
1521
+ Returns:
1522
+ Dict with 'missing_from_chroma', 'orphaned_in_chroma', 'fixed' counts
1523
+ """
1524
+ coll = self._resolve_collection(collection)
1525
+
1526
+ # Get IDs from both stores
1527
+ doc_ids = set(self._document_store.list_ids(coll))
1528
+ chroma_ids = set(self._store.list_ids(coll))
1529
+
1530
+ missing_from_chroma = doc_ids - chroma_ids
1531
+ orphaned_in_chroma = chroma_ids - doc_ids
1532
+
1533
+ fixed = 0
1534
+ if fix and missing_from_chroma:
1535
+ for doc_id in missing_from_chroma:
1536
+ try:
1537
+ # Re-fetch and re-index
1538
+ doc_record = self._document_store.get(coll, doc_id)
1539
+ if doc_record:
1540
+ # Fetch original content
1541
+ doc = self._document_provider.fetch(doc_id)
1542
+ embedding = self._get_embedding_provider().embed(doc.content)
1543
+
1544
+ # Write to ChromaDB
1545
+ self._store.upsert(
1546
+ collection=coll,
1547
+ id=doc_id,
1548
+ embedding=embedding,
1549
+ summary=doc_record.summary,
1550
+ tags=doc_record.tags,
1551
+ )
1552
+ fixed += 1
1553
+ logger.info("Reconciled: %s", doc_id)
1554
+ except Exception as e:
1555
+ logger.warning("Failed to reconcile %s: %s", doc_id, e)
1556
+
1557
+ return {
1558
+ "missing_from_chroma": len(missing_from_chroma),
1559
+ "orphaned_in_chroma": len(orphaned_in_chroma),
1560
+ "fixed": fixed,
1561
+ "missing_ids": list(missing_from_chroma) if missing_from_chroma else [],
1562
+ "orphaned_ids": list(orphaned_in_chroma) if orphaned_in_chroma else [],
1563
+ }
1564
+
1310
1565
  def close(self) -> None:
1311
1566
  """
1312
1567
  Close resources (embedding cache connection, pending queue, etc.).