keep-skill 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keep/config.py CHANGED
@@ -21,7 +21,7 @@ except ImportError:
21
21
 
22
22
 
23
23
  CONFIG_FILENAME = "keep.toml"
24
- CONFIG_VERSION = 2 # Bumped for embedding identity tracking
24
+ CONFIG_VERSION = 3 # Bumped for document versioning support
25
25
 
26
26
 
27
27
  @dataclass
@@ -89,7 +89,7 @@ class StoreConfig:
89
89
  default_tags: dict[str, str] = field(default_factory=dict)
90
90
 
91
91
  # Maximum length for summaries (used for smart remember and validation)
92
- max_summary_length: int = 1000
92
+ max_summary_length: int = 500
93
93
 
94
94
  @property
95
95
  def config_path(self) -> Path:
keep/document_store.py CHANGED
@@ -22,6 +22,24 @@ from pathlib import Path
22
22
  from typing import Any, Optional
23
23
 
24
24
 
25
+ # Schema version for migrations
26
+ SCHEMA_VERSION = 1
27
+
28
+
29
+ @dataclass
30
+ class VersionInfo:
31
+ """
32
+ Information about a document version.
33
+
34
+ Used for version navigation and history display.
35
+ """
36
+ version: int # 1=oldest archived, increasing
37
+ summary: str
38
+ tags: dict[str, str]
39
+ created_at: str
40
+ content_hash: Optional[str] = None
41
+
42
+
25
43
  @dataclass
26
44
  class DocumentRecord:
27
45
  """
@@ -63,7 +81,12 @@ class DocumentStore:
63
81
  self._db_path.parent.mkdir(parents=True, exist_ok=True)
64
82
  self._conn = sqlite3.connect(str(self._db_path), check_same_thread=False)
65
83
  self._conn.row_factory = sqlite3.Row
66
-
84
+
85
+ # Enable WAL mode for better concurrent access across processes
86
+ self._conn.execute("PRAGMA journal_mode=WAL")
87
+ # Wait up to 5 seconds for locks instead of failing immediately
88
+ self._conn.execute("PRAGMA busy_timeout=5000")
89
+
67
90
  self._conn.execute("""
68
91
  CREATE TABLE IF NOT EXISTS documents (
69
92
  id TEXT NOT NULL,
@@ -82,20 +105,57 @@ class DocumentStore:
82
105
  columns = {row[1] for row in cursor.fetchall()}
83
106
  if "content_hash" not in columns:
84
107
  self._conn.execute("ALTER TABLE documents ADD COLUMN content_hash TEXT")
85
-
108
+
86
109
  # Index for collection queries
87
110
  self._conn.execute("""
88
111
  CREATE INDEX IF NOT EXISTS idx_documents_collection
89
112
  ON documents(collection)
90
113
  """)
91
-
114
+
92
115
  # Index for timestamp queries
93
116
  self._conn.execute("""
94
117
  CREATE INDEX IF NOT EXISTS idx_documents_updated
95
118
  ON documents(updated_at)
96
119
  """)
97
-
120
+
98
121
  self._conn.commit()
122
+
123
+ # Run schema migrations
124
+ self._migrate_schema()
125
+
126
+ def _migrate_schema(self) -> None:
127
+ """
128
+ Run schema migrations using PRAGMA user_version.
129
+
130
+ Migrations:
131
+ - Version 0 → 1: Create document_versions table
132
+ """
133
+ cursor = self._conn.execute("PRAGMA user_version")
134
+ current_version = cursor.fetchone()[0]
135
+
136
+ if current_version < 1:
137
+ # Create versions table for document history
138
+ self._conn.execute("""
139
+ CREATE TABLE IF NOT EXISTS document_versions (
140
+ id TEXT NOT NULL,
141
+ collection TEXT NOT NULL,
142
+ version INTEGER NOT NULL,
143
+ summary TEXT NOT NULL,
144
+ tags_json TEXT NOT NULL,
145
+ content_hash TEXT,
146
+ created_at TEXT NOT NULL,
147
+ PRIMARY KEY (id, collection, version)
148
+ )
149
+ """)
150
+
151
+ # Index for efficient version lookups
152
+ self._conn.execute("""
153
+ CREATE INDEX IF NOT EXISTS idx_versions_doc
154
+ ON document_versions(id, collection, version DESC)
155
+ """)
156
+
157
+ self._conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
158
+ self._conn.commit()
99
159
 
100
160
  def _now(self) -> str:
101
161
  """Current timestamp in ISO format."""
@@ -134,11 +194,12 @@ class DocumentStore:
134
194
  summary: str,
135
195
  tags: dict[str, str],
136
196
  content_hash: Optional[str] = None,
137
- ) -> DocumentRecord:
197
+ ) -> tuple[DocumentRecord, bool]:
138
198
  """
139
199
  Insert or update a document record.
140
200
 
141
201
  Preserves created_at on update. Updates updated_at always.
202
+ Archives the current version to history before updating.
142
203
 
143
204
  Args:
144
205
  collection: Collection name
@@ -148,15 +209,27 @@ class DocumentStore:
148
209
  content_hash: SHA256 hash of content (for change detection)
149
210
 
150
211
  Returns:
151
- The stored DocumentRecord
212
+ Tuple of (stored DocumentRecord, content_changed bool).
213
+ content_changed is True if content hash differs from previous,
214
+ False if only tags/summary changed or if new document.
152
215
  """
153
216
  now = self._now()
154
217
  tags_json = json.dumps(tags, ensure_ascii=False)
155
218
 
156
219
  with self._lock:
157
- # Check if exists to preserve created_at
220
+ # Check if exists to preserve created_at and archive
158
221
  existing = self._get_unlocked(collection, id)
159
222
  created_at = existing.created_at if existing else now
223
+ content_changed = False
224
+
225
+ if existing:
226
+ # Archive current version before updating
227
+ self._archive_current_unlocked(collection, id, existing)
228
+ # Detect content change
229
+ content_changed = (
230
+ content_hash is not None
231
+ and existing.content_hash != content_hash
232
+ )
160
233
 
161
234
  self._conn.execute("""
162
235
  INSERT OR REPLACE INTO documents
@@ -173,7 +246,51 @@ class DocumentStore:
173
246
  created_at=created_at,
174
247
  updated_at=now,
175
248
  content_hash=content_hash,
176
- )
249
+ ), content_changed
250
+
251
+ def _archive_current_unlocked(
252
+ self,
253
+ collection: str,
254
+ id: str,
255
+ current: DocumentRecord,
256
+ ) -> int:
257
+ """
258
+ Archive the current version to the versions table.
259
+
260
+ Must be called within a lock context.
261
+
262
+ Args:
263
+ collection: Collection name
264
+ id: Document identifier
265
+ current: Current document record to archive
266
+
267
+ Returns:
268
+ The version number assigned to the archived version
269
+ """
270
+ # Get the next version number
271
+ cursor = self._conn.execute("""
272
+ SELECT COALESCE(MAX(version), 0) + 1
273
+ FROM document_versions
274
+ WHERE id = ? AND collection = ?
275
+ """, (id, collection))
276
+ next_version = cursor.fetchone()[0]
277
+
278
+ # Insert the current state as a version
279
+ self._conn.execute("""
280
+ INSERT INTO document_versions
281
+ (id, collection, version, summary, tags_json, content_hash, created_at)
282
+ VALUES (?, ?, ?, ?, ?, ?, ?)
283
+ """, (
284
+ id,
285
+ collection,
286
+ next_version,
287
+ current.summary,
288
+ json.dumps(current.tags, ensure_ascii=False),
289
+ current.content_hash,
290
+ current.updated_at, # Use updated_at as the version's timestamp
291
+ ))
292
+
293
+ return next_version
177
294
 
178
295
  def update_summary(self, collection: str, id: str, summary: str) -> bool:
179
296
  """
@@ -231,13 +348,14 @@ class DocumentStore:
231
348
 
232
349
  return cursor.rowcount > 0
233
350
 
234
- def delete(self, collection: str, id: str) -> bool:
351
+ def delete(self, collection: str, id: str, delete_versions: bool = True) -> bool:
235
352
  """
236
- Delete a document record.
353
+ Delete a document record and optionally its version history.
237
354
 
238
355
  Args:
239
356
  collection: Collection name
240
357
  id: Document identifier
358
+ delete_versions: If True, also delete version history
241
359
 
242
360
  Returns:
243
361
  True if document existed and was deleted
@@ -247,6 +365,13 @@ class DocumentStore:
247
365
  DELETE FROM documents
248
366
  WHERE id = ? AND collection = ?
249
367
  """, (id, collection))
368
+
369
+ if delete_versions:
370
+ self._conn.execute("""
371
+ DELETE FROM document_versions
372
+ WHERE id = ? AND collection = ?
373
+ """, (id, collection))
374
+
250
375
  self._conn.commit()
251
376
 
252
377
  return cursor.rowcount > 0
@@ -285,7 +410,185 @@ class DocumentStore:
285
410
  updated_at=row["updated_at"],
286
411
  content_hash=row["content_hash"],
287
412
  )
288
-
413
+
414
+ def get_version(
415
+ self,
416
+ collection: str,
417
+ id: str,
418
+ offset: int = 0,
419
+ ) -> Optional[VersionInfo]:
420
+ """
421
+ Get a specific version of a document by offset.
422
+
423
+ Offset semantics:
424
+ - 0 = current version (returns None, use get() instead)
425
+ - 1 = previous version (most recent archived)
426
+ - 2 = two versions ago
427
+ - etc.
428
+
429
+ Args:
430
+ collection: Collection name
431
+ id: Document identifier
432
+ offset: Version offset (0=current, 1=previous, etc.)
433
+
434
+ Returns:
435
+ VersionInfo if found, None if offset 0 or version doesn't exist
436
+ """
437
+ if offset == 0:
438
+ # Offset 0 means current - caller should use get()
439
+ return None
440
+
441
+ # Get max version to calculate the target
442
+ cursor = self._conn.execute("""
443
+ SELECT MAX(version) FROM document_versions
444
+ WHERE id = ? AND collection = ?
445
+ """, (id, collection))
446
+ max_version = cursor.fetchone()[0]
447
+
448
+ if max_version is None:
449
+ return None # No versions archived
450
+
451
+ # offset=1 → max_version, offset=2 → max_version-1, etc.
452
+ target_version = max_version - (offset - 1)
453
+
454
+ if target_version < 1:
455
+ return None # Requested version doesn't exist
456
+
457
+ cursor = self._conn.execute("""
458
+ SELECT version, summary, tags_json, content_hash, created_at
459
+ FROM document_versions
460
+ WHERE id = ? AND collection = ? AND version = ?
461
+ """, (id, collection, target_version))
462
+
463
+ row = cursor.fetchone()
464
+ if row is None:
465
+ return None
466
+
467
+ return VersionInfo(
468
+ version=row["version"],
469
+ summary=row["summary"],
470
+ tags=json.loads(row["tags_json"]),
471
+ created_at=row["created_at"],
472
+ content_hash=row["content_hash"],
473
+ )
474
+
475
+ def list_versions(
476
+ self,
477
+ collection: str,
478
+ id: str,
479
+ limit: int = 10,
480
+ ) -> list[VersionInfo]:
481
+ """
482
+ List version history for a document.
483
+
484
+ Returns versions in reverse chronological order (newest first).
485
+
486
+ Args:
487
+ collection: Collection name
488
+ id: Document identifier
489
+ limit: Maximum versions to return
490
+
491
+ Returns:
492
+ List of VersionInfo, newest archived first
493
+ """
494
+ cursor = self._conn.execute("""
495
+ SELECT version, summary, tags_json, content_hash, created_at
496
+ FROM document_versions
497
+ WHERE id = ? AND collection = ?
498
+ ORDER BY version DESC
499
+ LIMIT ?
500
+ """, (id, collection, limit))
501
+
502
+ versions = []
503
+ for row in cursor:
504
+ versions.append(VersionInfo(
505
+ version=row["version"],
506
+ summary=row["summary"],
507
+ tags=json.loads(row["tags_json"]),
508
+ created_at=row["created_at"],
509
+ content_hash=row["content_hash"],
510
+ ))
511
+
512
+ return versions
513
+
514
+ def get_version_nav(
515
+ self,
516
+ collection: str,
517
+ id: str,
518
+ current_version: Optional[int] = None,
519
+ limit: int = 3,
520
+ ) -> dict[str, list[VersionInfo]]:
521
+ """
522
+ Get version navigation info (prev/next) for display.
523
+
524
+ Args:
525
+ collection: Collection name
526
+ id: Document identifier
527
+ current_version: The version being viewed (None = current/live version)
528
+ limit: Max previous versions to return when viewing current
529
+
530
+ Returns:
531
+ Dict with 'prev' and optionally 'next' lists of VersionInfo.
532
+ When viewing current (None): {'prev': [up to limit versions]}
533
+ When viewing old version N: {'prev': [N-1 if exists], 'next': [N+1 if exists]}
534
+ """
535
+ result: dict[str, list[VersionInfo]] = {"prev": []}
536
+
537
+ if current_version is None:
538
+ # Viewing current version: get up to `limit` previous versions
539
+ versions = self.list_versions(collection, id, limit=limit)
540
+ result["prev"] = versions
541
+ else:
542
+ # Viewing an old version: get prev (N-1) and next (N+1)
543
+ # Previous version (older)
544
+ if current_version > 1:
545
+ cursor = self._conn.execute("""
546
+ SELECT version, summary, tags_json, content_hash, created_at
547
+ FROM document_versions
548
+ WHERE id = ? AND collection = ? AND version = ?
549
+ """, (id, collection, current_version - 1))
550
+ row = cursor.fetchone()
551
+ if row:
552
+ result["prev"] = [VersionInfo(
553
+ version=row["version"],
554
+ summary=row["summary"],
555
+ tags=json.loads(row["tags_json"]),
556
+ created_at=row["created_at"],
557
+ content_hash=row["content_hash"],
558
+ )]
559
+
560
+ # Next version (newer)
561
+ cursor = self._conn.execute("""
562
+ SELECT version, summary, tags_json, content_hash, created_at
563
+ FROM document_versions
564
+ WHERE id = ? AND collection = ? AND version = ?
565
+ """, (id, collection, current_version + 1))
566
+ row = cursor.fetchone()
567
+ if row:
568
+ result["next"] = [VersionInfo(
569
+ version=row["version"],
570
+ summary=row["summary"],
571
+ tags=json.loads(row["tags_json"]),
572
+ created_at=row["created_at"],
573
+ content_hash=row["content_hash"],
574
+ )]
575
+ else:
576
+ # Check if there's a current version (meaning we're at newest archived)
577
+ if self.exists(collection, id):
578
+ # Next is "current" - indicate this with empty next
579
+ # (caller knows to check current doc)
580
+ result["next"] = []
581
+
582
+ return result
583
+
584
+ def version_count(self, collection: str, id: str) -> int:
585
+ """Count archived versions for a document."""
586
+ cursor = self._conn.execute("""
587
+ SELECT COUNT(*) FROM document_versions
588
+ WHERE id = ? AND collection = ?
589
+ """, (id, collection))
590
+ return cursor.fetchone()[0]
591
+
289
592
  def get_many(
290
593
  self,
291
594
  collection: str,
@@ -363,7 +666,43 @@ class DocumentStore:
363
666
  """, (collection,))
364
667
 
365
668
  return [row["id"] for row in cursor]
366
-
669
+
670
+ def list_recent(
671
+ self,
672
+ collection: str,
673
+ limit: int = 10,
674
+ ) -> list[DocumentRecord]:
675
+ """
676
+ List recent documents ordered by update time.
677
+
678
+ Args:
679
+ collection: Collection name
680
+ limit: Maximum number to return
681
+
682
+ Returns:
683
+ List of DocumentRecords, most recently updated first
684
+ """
685
+ cursor = self._conn.execute("""
686
+ SELECT id, collection, summary, tags_json, created_at, updated_at, content_hash
687
+ FROM documents
688
+ WHERE collection = ?
689
+ ORDER BY updated_at DESC
690
+ LIMIT ?
691
+ """, (collection, limit))
692
+
693
+ return [
694
+ DocumentRecord(
695
+ id=row["id"],
696
+ collection=row["collection"],
697
+ summary=row["summary"],
698
+ tags=json.loads(row["tags_json"]),
699
+ created_at=row["created_at"],
700
+ updated_at=row["updated_at"],
701
+ content_hash=row["content_hash"],
702
+ )
703
+ for row in cursor
704
+ ]
705
+
367
706
  def count(self, collection: str) -> int:
368
707
  """Count documents in a collection."""
369
708
  cursor = self._conn.execute("""
keep/pending_summaries.py CHANGED
@@ -45,6 +45,12 @@ class PendingSummaryQueue:
45
45
  """Initialize the SQLite database."""
46
46
  self._queue_path.parent.mkdir(parents=True, exist_ok=True)
47
47
  self._conn = sqlite3.connect(str(self._queue_path), check_same_thread=False)
48
+
49
+ # Enable WAL mode for better concurrent access across processes
50
+ self._conn.execute("PRAGMA journal_mode=WAL")
51
+ # Wait up to 5 seconds for locks instead of failing immediately
52
+ self._conn.execute("PRAGMA busy_timeout=5000")
53
+
48
54
  self._conn.execute("""
49
55
  CREATE TABLE IF NOT EXISTS pending_summaries (
50
56
  id TEXT NOT NULL,
@@ -40,6 +40,12 @@ class EmbeddingCache:
40
40
  """Initialize the SQLite database."""
41
41
  self._cache_path.parent.mkdir(parents=True, exist_ok=True)
42
42
  self._conn = sqlite3.connect(str(self._cache_path), check_same_thread=False)
43
+
44
+ # Enable WAL mode for better concurrent access across processes
45
+ self._conn.execute("PRAGMA journal_mode=WAL")
46
+ # Wait up to 5 seconds for locks instead of failing immediately
47
+ self._conn.execute("PRAGMA busy_timeout=5000")
48
+
43
49
  self._conn.execute("""
44
50
  CREATE TABLE IF NOT EXISTS embedding_cache (
45
51
  content_hash TEXT PRIMARY KEY,
keep/store.py CHANGED
@@ -124,7 +124,7 @@ class ChromaStore:
124
124
  ) -> None:
125
125
  """
126
126
  Insert or update an item in the store.
127
-
127
+
128
128
  Args:
129
129
  collection: Collection name
130
130
  id: Item identifier (URI or custom)
@@ -140,9 +140,9 @@ class ChromaStore:
140
140
  f"Embedding dimension mismatch: expected {self._embedding_dimension}, "
141
141
  f"got {len(embedding)}"
142
142
  )
143
-
143
+
144
144
  coll = self._get_collection(collection)
145
-
145
+
146
146
  # Add timestamp if not present
147
147
  now = datetime.now(timezone.utc).isoformat()
148
148
  if "_updated" not in tags:
@@ -158,36 +158,122 @@ class ChromaStore:
158
158
  tags = {**tags, "_created": now}
159
159
  else:
160
160
  tags = {**tags, "_created": now}
161
-
161
+
162
162
  # Add date portion for easier date queries
163
163
  tags = {**tags, "_updated_date": now[:10]}
164
-
164
+
165
165
  coll.upsert(
166
166
  ids=[id],
167
167
  embeddings=[embedding],
168
168
  documents=[summary],
169
169
  metadatas=[self._tags_to_metadata(tags)],
170
170
  )
171
+
172
+ def upsert_version(
173
+ self,
174
+ collection: str,
175
+ id: str,
176
+ version: int,
177
+ embedding: list[float],
178
+ summary: str,
179
+ tags: dict[str, str],
180
+ ) -> None:
181
+ """
182
+ Store an archived version with a versioned ID.
183
+
184
+ The versioned ID format is: {id}@v{version}
185
+ Metadata includes _version and _base_id for filtering/navigation.
186
+
187
+ Args:
188
+ collection: Collection name
189
+ id: Base item identifier (not versioned)
190
+ version: Version number (1=oldest archived)
191
+ embedding: Vector embedding
192
+ summary: Human-readable summary
193
+ tags: All tags from the archived version
194
+ """
195
+ if self._embedding_dimension is None:
196
+ self._embedding_dimension = len(embedding)
197
+ elif len(embedding) != self._embedding_dimension:
198
+ raise ValueError(
199
+ f"Embedding dimension mismatch: expected {self._embedding_dimension}, "
200
+ f"got {len(embedding)}"
201
+ )
202
+
203
+ coll = self._get_collection(collection)
204
+
205
+ # Versioned ID format
206
+ versioned_id = f"{id}@v{version}"
207
+
208
+ # Add version metadata
209
+ versioned_tags = dict(tags)
210
+ versioned_tags["_version"] = str(version)
211
+ versioned_tags["_base_id"] = id
212
+
213
+ coll.upsert(
214
+ ids=[versioned_id],
215
+ embeddings=[embedding],
216
+ documents=[summary],
217
+ metadatas=[self._tags_to_metadata(versioned_tags)],
218
+ )
219
+
220
+ def get_content_hash(self, collection: str, id: str) -> Optional[str]:
221
+ """
222
+ Get the content hash of an existing item.
223
+
224
+ Used to check if content changed (to skip re-embedding).
225
+
226
+ Args:
227
+ collection: Collection name
228
+ id: Item identifier
229
+
230
+ Returns:
231
+ Content hash if item exists and has one, None otherwise
232
+ """
233
+ coll = self._get_collection(collection)
234
+ result = coll.get(ids=[id], include=["metadatas"])
235
+
236
+ if not result["ids"]:
237
+ return None
238
+
239
+ metadata = result["metadatas"][0] or {}
240
+ return metadata.get("_content_hash")
171
241
 
172
- def delete(self, collection: str, id: str) -> bool:
242
+ def delete(self, collection: str, id: str, delete_versions: bool = True) -> bool:
173
243
  """
174
244
  Delete an item from the store.
175
-
245
+
176
246
  Args:
177
247
  collection: Collection name
178
248
  id: Item identifier
179
-
249
+ delete_versions: If True, also delete versioned copies ({id}@v{N})
250
+
180
251
  Returns:
181
252
  True if item existed and was deleted, False if not found
182
253
  """
183
254
  coll = self._get_collection(collection)
184
-
255
+
185
256
  # Check existence first
186
257
  existing = coll.get(ids=[id])
187
258
  if not existing["ids"]:
188
259
  return False
189
-
260
+
190
261
  coll.delete(ids=[id])
262
+
263
+ if delete_versions:
264
+ # Delete all versioned copies
265
+ # Query by _base_id metadata to find all versions
266
+ try:
267
+ versions = coll.get(
268
+ where={"_base_id": id},
269
+ include=[],
270
+ )
271
+ if versions["ids"]:
272
+ coll.delete(ids=versions["ids"])
273
+ except Exception:
274
+ # Metadata filter might fail if no versions exist
275
+ pass
276
+
191
277
  return True
192
278
 
193
279
  def update_summary(self, collection: str, id: str, summary: str) -> bool:
@@ -295,7 +381,38 @@ class ChromaStore:
295
381
  coll = self._get_collection(collection)
296
382
  result = coll.get(ids=[id], include=[])
297
383
  return bool(result["ids"])
298
-
384
+
385
+ def get_embedding(self, collection: str, id: str) -> list[float] | None:
386
+ """
387
+ Retrieve the stored embedding for a document.
388
+
389
+ Args:
390
+ collection: Collection name
391
+ id: Item identifier
392
+
393
+ Returns:
394
+ Embedding vector if found, None otherwise
395
+ """
396
+ coll = self._get_collection(collection)
397
+ result = coll.get(ids=[id], include=["embeddings"])
398
+ if not result["ids"] or result["embeddings"] is None or len(result["embeddings"]) == 0:
399
+ return None
400
+ return list(result["embeddings"][0])
401
+
402
+ def list_ids(self, collection: str) -> list[str]:
403
+ """
404
+ List all document IDs in a collection.
405
+
406
+ Args:
407
+ collection: Collection name
408
+
409
+ Returns:
410
+ List of document IDs
411
+ """
412
+ coll = self._get_collection(collection)
413
+ result = coll.get(include=[])
414
+ return result["ids"]
415
+
299
416
  def query_embedding(
300
417
  self,
301
418
  collection: str,