keep-skill 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +1 -1
- keep/api.py +45 -40
- keep/cli.py +142 -28
- keep/config.py +1 -1
- keep/data/system/library.md +144 -0
- keep/providers/base.py +39 -0
- keep/providers/llm.py +29 -41
- keep/providers/mlx.py +18 -21
- keep/store.py +31 -0
- {keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/METADATA +8 -5
- {keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/RECORD +14 -13
- {keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/WHEEL +0 -0
- {keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/entry_points.txt +0 -0
- {keep_skill-0.8.1.dist-info → keep_skill-0.10.0.dist-info}/licenses/LICENSE +0 -0
keep/__init__.py
CHANGED
keep/api.py
CHANGED
|
@@ -191,6 +191,7 @@ SYSTEM_DOC_IDS = {
|
|
|
191
191
|
"now.md": "_system:now",
|
|
192
192
|
"conversations.md": "_system:conversations",
|
|
193
193
|
"domains.md": "_system:domains",
|
|
194
|
+
"library.md": "_system:library",
|
|
194
195
|
}
|
|
195
196
|
|
|
196
197
|
|
|
@@ -404,24 +405,25 @@ class Keeper:
|
|
|
404
405
|
except Exception as e:
|
|
405
406
|
logger.debug("Error scanning old system docs: %s", e)
|
|
406
407
|
|
|
407
|
-
# Second pass: create
|
|
408
|
+
# Second pass: create or update system docs from bundled content
|
|
408
409
|
for path in SYSTEM_DOC_DIR.glob("*.md"):
|
|
409
410
|
new_id = SYSTEM_DOC_IDS.get(path.name)
|
|
410
411
|
if new_id is None:
|
|
411
412
|
logger.debug("Skipping unknown system doc: %s", path.name)
|
|
412
413
|
continue
|
|
413
414
|
|
|
414
|
-
# Skip if already exists
|
|
415
|
-
if self.exists(new_id):
|
|
416
|
-
stats["skipped"] += 1
|
|
417
|
-
continue
|
|
418
|
-
|
|
419
415
|
try:
|
|
420
416
|
content, tags = _load_frontmatter(path)
|
|
421
417
|
tags["category"] = "system"
|
|
418
|
+
existed = self.exists(new_id)
|
|
419
|
+
# remember() handles both create and update (with re-summarization)
|
|
422
420
|
self.remember(content, id=new_id, tags=tags)
|
|
423
|
-
|
|
424
|
-
|
|
421
|
+
if existed:
|
|
422
|
+
stats["migrated"] += 1
|
|
423
|
+
logger.info("Updated system doc: %s", new_id)
|
|
424
|
+
else:
|
|
425
|
+
stats["created"] += 1
|
|
426
|
+
logger.info("Created system doc: %s", new_id)
|
|
425
427
|
except FileNotFoundError:
|
|
426
428
|
# System file missing - skip silently
|
|
427
429
|
pass
|
|
@@ -534,13 +536,17 @@ class Keeper:
|
|
|
534
536
|
summary: Optional[str] = None,
|
|
535
537
|
source_tags: Optional[dict[str, str]] = None, # Deprecated alias
|
|
536
538
|
collection: Optional[str] = None,
|
|
537
|
-
lazy: bool = False
|
|
538
539
|
) -> Item:
|
|
539
540
|
"""
|
|
540
541
|
Insert or update a document in the store.
|
|
541
542
|
|
|
542
543
|
Fetches the document, generates embeddings and summary, then stores it.
|
|
543
544
|
|
|
545
|
+
**Summary behavior:**
|
|
546
|
+
- If summary is provided, use it (skips auto-summarization)
|
|
547
|
+
- For large content, summarization is async (truncated placeholder
|
|
548
|
+
stored immediately, real summary generated in background)
|
|
549
|
+
|
|
544
550
|
**Update behavior:**
|
|
545
551
|
- Summary: Replaced with user-provided or newly generated summary
|
|
546
552
|
- Tags: Merged - existing tags are preserved, new tags override
|
|
@@ -553,9 +559,6 @@ class Keeper:
|
|
|
553
559
|
summary: User-provided summary (skips auto-summarization if given)
|
|
554
560
|
source_tags: Deprecated alias for 'tags'
|
|
555
561
|
collection: Target collection (uses default if None)
|
|
556
|
-
lazy: If True, use truncated placeholder summary and queue for
|
|
557
|
-
background processing. Use `process_pending()` to generate
|
|
558
|
-
real summaries later. Ignored if summary is provided.
|
|
559
562
|
|
|
560
563
|
Returns:
|
|
561
564
|
The stored Item with merged tags and new summary
|
|
@@ -615,17 +618,14 @@ class Keeper:
|
|
|
615
618
|
)
|
|
616
619
|
summary = summary[:max_len]
|
|
617
620
|
final_summary = summary
|
|
618
|
-
|
|
619
|
-
#
|
|
621
|
+
else:
|
|
622
|
+
# Large content: async summarization (truncated placeholder now, real summary later)
|
|
620
623
|
if len(doc.content) > max_len:
|
|
621
624
|
final_summary = doc.content[:max_len] + "..."
|
|
625
|
+
# Queue for background processing
|
|
626
|
+
self._pending_queue.enqueue(id, coll, doc.content)
|
|
622
627
|
else:
|
|
623
628
|
final_summary = doc.content
|
|
624
|
-
# Queue for background processing
|
|
625
|
-
self._pending_queue.enqueue(id, coll, doc.content)
|
|
626
|
-
else:
|
|
627
|
-
# Auto-generate summary
|
|
628
|
-
final_summary = self._get_summarization_provider().summarize(doc.content)
|
|
629
629
|
|
|
630
630
|
# Build tags: existing → config → env → user (later wins on collision)
|
|
631
631
|
merged_tags = {**existing_tags}
|
|
@@ -686,8 +686,8 @@ class Keeper:
|
|
|
686
686
|
tags=old_doc.tags,
|
|
687
687
|
)
|
|
688
688
|
|
|
689
|
-
# Spawn background processor if
|
|
690
|
-
if
|
|
689
|
+
# Spawn background processor if content was queued (large content, no user summary, content changed)
|
|
690
|
+
if summary is None and len(doc.content) > max_len and not content_unchanged:
|
|
691
691
|
self._spawn_processor()
|
|
692
692
|
|
|
693
693
|
# Return the stored item
|
|
@@ -703,7 +703,6 @@ class Keeper:
|
|
|
703
703
|
tags: Optional[dict[str, str]] = None,
|
|
704
704
|
source_tags: Optional[dict[str, str]] = None, # Deprecated alias
|
|
705
705
|
collection: Optional[str] = None,
|
|
706
|
-
lazy: bool = False
|
|
707
706
|
) -> Item:
|
|
708
707
|
"""
|
|
709
708
|
Store inline content directly (without fetching from a URI).
|
|
@@ -713,7 +712,8 @@ class Keeper:
|
|
|
713
712
|
**Smart summary behavior:**
|
|
714
713
|
- If summary is provided, use it (skips auto-summarization)
|
|
715
714
|
- If content is short (≤ max_summary_length), use content verbatim
|
|
716
|
-
-
|
|
715
|
+
- For large content, summarization is async (truncated placeholder
|
|
716
|
+
stored immediately, real summary generated in background)
|
|
717
717
|
|
|
718
718
|
**Update behavior (when id already exists):**
|
|
719
719
|
- Summary: Replaced with user-provided, content, or generated summary
|
|
@@ -728,9 +728,6 @@ class Keeper:
|
|
|
728
728
|
tags: User-provided tags to merge with existing tags
|
|
729
729
|
source_tags: Deprecated alias for 'tags'
|
|
730
730
|
collection: Target collection (uses default if None)
|
|
731
|
-
lazy: If True and content is long, use truncated placeholder summary
|
|
732
|
-
and queue for background processing. Ignored if content is
|
|
733
|
-
short or summary is provided.
|
|
734
731
|
|
|
735
732
|
Returns:
|
|
736
733
|
The stored Item with merged tags and new summary
|
|
@@ -794,14 +791,11 @@ class Keeper:
|
|
|
794
791
|
elif len(content) <= max_len:
|
|
795
792
|
# Content is short enough - use verbatim (smart summary)
|
|
796
793
|
final_summary = content
|
|
797
|
-
|
|
798
|
-
# Content is long
|
|
794
|
+
else:
|
|
795
|
+
# Content is long - async summarization (truncated placeholder now, real summary later)
|
|
799
796
|
final_summary = content[:max_len] + "..."
|
|
800
797
|
# Queue for background processing
|
|
801
798
|
self._pending_queue.enqueue(id, coll, content)
|
|
802
|
-
else:
|
|
803
|
-
# Content is long - generate summary
|
|
804
|
-
final_summary = self._get_summarization_provider().summarize(content)
|
|
805
799
|
|
|
806
800
|
# Build tags: existing → config → env → user (later wins on collision)
|
|
807
801
|
merged_tags = {**existing_tags}
|
|
@@ -860,8 +854,8 @@ class Keeper:
|
|
|
860
854
|
tags=old_doc.tags,
|
|
861
855
|
)
|
|
862
856
|
|
|
863
|
-
# Spawn background processor if
|
|
864
|
-
if
|
|
857
|
+
# Spawn background processor if content was queued (large content, no user summary, content changed)
|
|
858
|
+
if summary is None and len(content) > max_len and not content_unchanged:
|
|
865
859
|
self._spawn_processor()
|
|
866
860
|
|
|
867
861
|
# Return the stored item
|
|
@@ -1366,14 +1360,14 @@ class Keeper:
|
|
|
1366
1360
|
|
|
1367
1361
|
def get_now(self) -> Item:
|
|
1368
1362
|
"""
|
|
1369
|
-
Get the current working
|
|
1363
|
+
Get the current working intentions.
|
|
1370
1364
|
|
|
1371
1365
|
A singleton document representing what you're currently working on.
|
|
1372
1366
|
If it doesn't exist, creates one with default content and tags from
|
|
1373
1367
|
the bundled system now.md file.
|
|
1374
1368
|
|
|
1375
1369
|
Returns:
|
|
1376
|
-
The current
|
|
1370
|
+
The current intentions Item (never None - auto-creates if missing)
|
|
1377
1371
|
"""
|
|
1378
1372
|
item = self.get(NOWDOC_ID)
|
|
1379
1373
|
if item is None:
|
|
@@ -1394,13 +1388,13 @@ class Keeper:
|
|
|
1394
1388
|
tags: Optional[dict[str, str]] = None,
|
|
1395
1389
|
) -> Item:
|
|
1396
1390
|
"""
|
|
1397
|
-
Set the current working
|
|
1391
|
+
Set the current working intentions.
|
|
1398
1392
|
|
|
1399
|
-
Updates the singleton
|
|
1393
|
+
Updates the singleton intentions with new content. Uses remember()
|
|
1400
1394
|
internally with the fixed NOWDOC_ID.
|
|
1401
1395
|
|
|
1402
1396
|
Args:
|
|
1403
|
-
content: New content for the current
|
|
1397
|
+
content: New content for the current intentions
|
|
1404
1398
|
tags: Optional additional tags to apply
|
|
1405
1399
|
|
|
1406
1400
|
Returns:
|
|
@@ -1784,10 +1778,18 @@ class Keeper:
|
|
|
1784
1778
|
|
|
1785
1779
|
def close(self) -> None:
|
|
1786
1780
|
"""
|
|
1787
|
-
Close resources (
|
|
1781
|
+
Close resources (stores, caches, queues).
|
|
1788
1782
|
|
|
1789
1783
|
Good practice to call when done, though Python's GC will clean up eventually.
|
|
1790
1784
|
"""
|
|
1785
|
+
# Close ChromaDB store
|
|
1786
|
+
if hasattr(self, '_store') and self._store is not None:
|
|
1787
|
+
self._store.close()
|
|
1788
|
+
|
|
1789
|
+
# Close document store (SQLite)
|
|
1790
|
+
if hasattr(self, '_document_store') and self._document_store is not None:
|
|
1791
|
+
self._document_store.close()
|
|
1792
|
+
|
|
1791
1793
|
# Close embedding cache if it was loaded
|
|
1792
1794
|
if self._embedding_provider is not None:
|
|
1793
1795
|
if hasattr(self._embedding_provider, '_cache'):
|
|
@@ -1810,4 +1812,7 @@ class Keeper:
|
|
|
1810
1812
|
|
|
1811
1813
|
def __del__(self):
|
|
1812
1814
|
"""Cleanup on deletion."""
|
|
1813
|
-
|
|
1815
|
+
try:
|
|
1816
|
+
self.close()
|
|
1817
|
+
except Exception:
|
|
1818
|
+
pass # Suppress errors during garbage collection
|
keep/cli.py
CHANGED
|
@@ -38,6 +38,13 @@ else:
|
|
|
38
38
|
configure_quiet_mode(quiet=True)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def _version_callback(value: bool):
|
|
42
|
+
if value:
|
|
43
|
+
from importlib.metadata import version
|
|
44
|
+
print(f"keep {version('keep-skill')}")
|
|
45
|
+
raise typer.Exit()
|
|
46
|
+
|
|
47
|
+
|
|
41
48
|
def _verbose_callback(value: bool):
|
|
42
49
|
if value:
|
|
43
50
|
enable_debug_mode()
|
|
@@ -242,6 +249,12 @@ def main_callback(
|
|
|
242
249
|
callback=_full_callback,
|
|
243
250
|
is_eager=True,
|
|
244
251
|
)] = False,
|
|
252
|
+
version: Annotated[Optional[bool], typer.Option(
|
|
253
|
+
"--version",
|
|
254
|
+
help="Show version and exit",
|
|
255
|
+
callback=_version_callback,
|
|
256
|
+
is_eager=True,
|
|
257
|
+
)] = None,
|
|
245
258
|
store: Annotated[Optional[Path], typer.Option(
|
|
246
259
|
"--store", "-s",
|
|
247
260
|
envvar="KEEP_STORE_PATH",
|
|
@@ -251,7 +264,7 @@ def main_callback(
|
|
|
251
264
|
)] = None,
|
|
252
265
|
):
|
|
253
266
|
"""Reflective memory with semantic search."""
|
|
254
|
-
# If no subcommand provided, show the current
|
|
267
|
+
# If no subcommand provided, show the current intentions (now)
|
|
255
268
|
if ctx.invoked_subcommand is None:
|
|
256
269
|
from .api import NOWDOC_ID
|
|
257
270
|
kp = _get_keeper(None, "default")
|
|
@@ -285,6 +298,7 @@ CollectionOption = Annotated[
|
|
|
285
298
|
str,
|
|
286
299
|
typer.Option(
|
|
287
300
|
"--collection", "-c",
|
|
301
|
+
envvar="KEEP_COLLECTION",
|
|
288
302
|
help="Collection name"
|
|
289
303
|
)
|
|
290
304
|
]
|
|
@@ -442,6 +456,28 @@ def _parse_tags(tags: Optional[list[str]]) -> dict[str, str]:
|
|
|
442
456
|
return parsed
|
|
443
457
|
|
|
444
458
|
|
|
459
|
+
def _filter_by_tags(items: list, tags: list[str]) -> list:
|
|
460
|
+
"""
|
|
461
|
+
Filter items by tag specifications (AND logic).
|
|
462
|
+
|
|
463
|
+
Each tag can be:
|
|
464
|
+
- "key" - item must have this tag key (any value)
|
|
465
|
+
- "key=value" - item must have this exact tag
|
|
466
|
+
"""
|
|
467
|
+
if not tags:
|
|
468
|
+
return items
|
|
469
|
+
|
|
470
|
+
result = items
|
|
471
|
+
for t in tags:
|
|
472
|
+
if "=" in t:
|
|
473
|
+
key, value = t.split("=", 1)
|
|
474
|
+
result = [item for item in result if item.tags.get(key) == value]
|
|
475
|
+
else:
|
|
476
|
+
# Key only - check if key exists
|
|
477
|
+
result = [item for item in result if t in item.tags]
|
|
478
|
+
return result
|
|
479
|
+
|
|
480
|
+
|
|
445
481
|
def _timestamp() -> str:
|
|
446
482
|
"""Generate timestamp for auto-generated IDs."""
|
|
447
483
|
from datetime import datetime, timezone
|
|
@@ -475,6 +511,10 @@ def find(
|
|
|
475
511
|
include_self: Annotated[bool, typer.Option(
|
|
476
512
|
help="Include the queried item (only with --id)"
|
|
477
513
|
)] = False,
|
|
514
|
+
tag: Annotated[Optional[list[str]], typer.Option(
|
|
515
|
+
"--tag", "-t",
|
|
516
|
+
help="Filter by tag (key or key=value, repeatable)"
|
|
517
|
+
)] = None,
|
|
478
518
|
store: StoreOption = None,
|
|
479
519
|
collection: CollectionOption = "default",
|
|
480
520
|
limit: LimitOption = 10,
|
|
@@ -487,6 +527,7 @@ def find(
|
|
|
487
527
|
Examples:
|
|
488
528
|
keep find "authentication" # Search by text
|
|
489
529
|
keep find --id file:///path/to/doc.md # Find similar to item
|
|
530
|
+
keep find "auth" -t project=myapp # Search + filter by tag
|
|
490
531
|
"""
|
|
491
532
|
if id and query:
|
|
492
533
|
typer.echo("Error: Specify either a query or --id, not both", err=True)
|
|
@@ -497,12 +538,19 @@ def find(
|
|
|
497
538
|
|
|
498
539
|
kp = _get_keeper(store, collection)
|
|
499
540
|
|
|
541
|
+
# Search with higher limit if filtering, then post-filter
|
|
542
|
+
search_limit = limit * 5 if tag else limit
|
|
543
|
+
|
|
500
544
|
if id:
|
|
501
|
-
results = kp.find_similar(id, limit=
|
|
545
|
+
results = kp.find_similar(id, limit=search_limit, since=since, include_self=include_self)
|
|
502
546
|
else:
|
|
503
|
-
results = kp.find(query, limit=
|
|
547
|
+
results = kp.find(query, limit=search_limit, since=since)
|
|
504
548
|
|
|
505
|
-
|
|
549
|
+
# Post-filter by tags if specified
|
|
550
|
+
if tag:
|
|
551
|
+
results = _filter_by_tags(results, tag)
|
|
552
|
+
|
|
553
|
+
typer.echo(_format_items(results[:limit], as_json=_get_json_output()))
|
|
506
554
|
|
|
507
555
|
|
|
508
556
|
@app.command()
|
|
@@ -683,10 +731,6 @@ def update(
|
|
|
683
731
|
"--summary",
|
|
684
732
|
help="User-provided summary (skips auto-summarization)"
|
|
685
733
|
)] = None,
|
|
686
|
-
lazy: Annotated[bool, typer.Option(
|
|
687
|
-
"--lazy",
|
|
688
|
-
help="Fast mode: use truncated summary, queue for later processing"
|
|
689
|
-
)] = False,
|
|
690
734
|
):
|
|
691
735
|
"""
|
|
692
736
|
Add or update a document in the store.
|
|
@@ -715,15 +759,15 @@ def update(
|
|
|
715
759
|
parsed_tags = {**frontmatter_tags, **parsed_tags} # CLI tags override
|
|
716
760
|
# Use content-addressed ID for stdin text (enables versioning)
|
|
717
761
|
doc_id = id or _text_content_id(content)
|
|
718
|
-
item = kp.remember(content, id=doc_id, summary=summary, tags=parsed_tags or None
|
|
762
|
+
item = kp.remember(content, id=doc_id, summary=summary, tags=parsed_tags or None)
|
|
719
763
|
elif source and _URI_SCHEME_PATTERN.match(source):
|
|
720
764
|
# URI mode: fetch from URI (ID is the URI itself)
|
|
721
|
-
item = kp.update(source, tags=parsed_tags or None, summary=summary
|
|
765
|
+
item = kp.update(source, tags=parsed_tags or None, summary=summary)
|
|
722
766
|
elif source:
|
|
723
767
|
# Text mode: inline content (no :// in source)
|
|
724
768
|
# Use content-addressed ID for text (enables versioning)
|
|
725
769
|
doc_id = id or _text_content_id(source)
|
|
726
|
-
item = kp.remember(source, id=doc_id, summary=summary, tags=parsed_tags or None
|
|
770
|
+
item = kp.remember(source, id=doc_id, summary=summary, tags=parsed_tags or None)
|
|
727
771
|
else:
|
|
728
772
|
typer.echo("Error: Provide content, URI, or '-' for stdin", err=True)
|
|
729
773
|
raise typer.Exit(1)
|
|
@@ -756,19 +800,25 @@ def now(
|
|
|
756
800
|
collection: CollectionOption = "default",
|
|
757
801
|
tags: Annotated[Optional[list[str]], typer.Option(
|
|
758
802
|
"--tag", "-t",
|
|
759
|
-
help="
|
|
803
|
+
help="Set tag (with content) or filter (without content)"
|
|
760
804
|
)] = None,
|
|
761
805
|
):
|
|
762
806
|
"""
|
|
763
|
-
Get or set the current working
|
|
807
|
+
Get or set the current working intentions.
|
|
764
808
|
|
|
765
|
-
With no arguments, displays the current
|
|
809
|
+
With no arguments, displays the current intentions.
|
|
766
810
|
With content, replaces it.
|
|
767
811
|
|
|
812
|
+
Tags behave differently based on mode:
|
|
813
|
+
- With content: -t sets tags on the update
|
|
814
|
+
- Without content: -t filters version history
|
|
815
|
+
|
|
768
816
|
\b
|
|
769
817
|
Examples:
|
|
770
|
-
keep now # Show current
|
|
771
|
-
keep now "What's important now" # Update
|
|
818
|
+
keep now # Show current intentions
|
|
819
|
+
keep now "What's important now" # Update intentions
|
|
820
|
+
keep now "Auth work" -t project=myapp # Update with tag
|
|
821
|
+
keep now -t project=myapp # Find version with tag
|
|
772
822
|
keep now -f context.md # Read content from file
|
|
773
823
|
keep now --reset # Reset to default from system
|
|
774
824
|
keep now -V 1 # Previous version
|
|
@@ -891,18 +941,70 @@ def now(
|
|
|
891
941
|
item = kp.set_now(new_content, tags=parsed_tags or None)
|
|
892
942
|
typer.echo(_format_item(item, as_json=_get_json_output()))
|
|
893
943
|
else:
|
|
894
|
-
# Get current
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
944
|
+
# Get current intentions (or search version history if tags specified)
|
|
945
|
+
if tags:
|
|
946
|
+
# Search version history for most recent version with matching tags
|
|
947
|
+
item = _find_now_version_by_tags(kp, tags, collection)
|
|
948
|
+
if item is None:
|
|
949
|
+
typer.echo("No version found matching tags", err=True)
|
|
950
|
+
raise typer.Exit(1)
|
|
951
|
+
# No version nav or similar items for filtered results
|
|
952
|
+
typer.echo(_format_item(item, as_json=_get_json_output()))
|
|
953
|
+
else:
|
|
954
|
+
# Standard: get current with version navigation and similar items
|
|
955
|
+
item = kp.get_now()
|
|
956
|
+
version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
|
|
957
|
+
similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
|
|
958
|
+
similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
|
|
959
|
+
typer.echo(_format_item(
|
|
960
|
+
item,
|
|
961
|
+
as_json=_get_json_output(),
|
|
962
|
+
version_nav=version_nav,
|
|
963
|
+
similar_items=similar_items,
|
|
964
|
+
similar_offsets=similar_offsets,
|
|
965
|
+
))
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _find_now_version_by_tags(kp, tags: list[str], collection: str):
|
|
969
|
+
"""
|
|
970
|
+
Search nowdoc version history for most recent version matching all tags.
|
|
971
|
+
|
|
972
|
+
Checks current version first, then scans archived versions.
|
|
973
|
+
"""
|
|
974
|
+
from .api import NOWDOC_ID
|
|
975
|
+
|
|
976
|
+
# Parse tag filters
|
|
977
|
+
tag_filters = []
|
|
978
|
+
for t in tags:
|
|
979
|
+
if "=" in t:
|
|
980
|
+
key, value = t.split("=", 1)
|
|
981
|
+
tag_filters.append((key, value))
|
|
982
|
+
else:
|
|
983
|
+
tag_filters.append((t, None)) # Key only
|
|
984
|
+
|
|
985
|
+
def matches_tags(item_tags: dict) -> bool:
|
|
986
|
+
for key, value in tag_filters:
|
|
987
|
+
if value is not None:
|
|
988
|
+
if item_tags.get(key) != value:
|
|
989
|
+
return False
|
|
990
|
+
else:
|
|
991
|
+
if key not in item_tags:
|
|
992
|
+
return False
|
|
993
|
+
return True
|
|
994
|
+
|
|
995
|
+
# Check current version first
|
|
996
|
+
current = kp.get_now()
|
|
997
|
+
if current and matches_tags(current.tags):
|
|
998
|
+
return current
|
|
999
|
+
|
|
1000
|
+
# Scan archived versions (newest first)
|
|
1001
|
+
versions = kp.list_versions(NOWDOC_ID, limit=100, collection=collection)
|
|
1002
|
+
for i, v in enumerate(versions):
|
|
1003
|
+
if matches_tags(v.tags):
|
|
1004
|
+
# Found match - get full item at this version offset
|
|
1005
|
+
return kp.get_version(NOWDOC_ID, i + 1, collection=collection)
|
|
1006
|
+
|
|
1007
|
+
return None
|
|
906
1008
|
|
|
907
1009
|
|
|
908
1010
|
@app.command()
|
|
@@ -924,6 +1026,10 @@ def get(
|
|
|
924
1026
|
"--no-similar",
|
|
925
1027
|
help="Suppress similar items in output"
|
|
926
1028
|
)] = False,
|
|
1029
|
+
tag: Annotated[Optional[list[str]], typer.Option(
|
|
1030
|
+
"--tag", "-t",
|
|
1031
|
+
help="Require tag (key or key=value, repeatable)"
|
|
1032
|
+
)] = None,
|
|
927
1033
|
limit: Annotated[int, typer.Option(
|
|
928
1034
|
"--limit", "-n",
|
|
929
1035
|
help="Max items for --history or --similar (default: 10)"
|
|
@@ -944,6 +1050,7 @@ def get(
|
|
|
944
1050
|
keep get doc:1 --history # List all versions
|
|
945
1051
|
keep get doc:1 --similar # List similar items
|
|
946
1052
|
keep get doc:1 --no-similar # Suppress similar items
|
|
1053
|
+
keep get doc:1 -t project=myapp # Only if tag matches
|
|
947
1054
|
"""
|
|
948
1055
|
kp = _get_keeper(store, collection)
|
|
949
1056
|
|
|
@@ -1078,6 +1185,13 @@ def get(
|
|
|
1078
1185
|
typer.echo(f"Not found: {actual_id}", err=True)
|
|
1079
1186
|
raise typer.Exit(1)
|
|
1080
1187
|
|
|
1188
|
+
# Check tag filter if specified
|
|
1189
|
+
if tag:
|
|
1190
|
+
filtered = _filter_by_tags([item], tag)
|
|
1191
|
+
if not filtered:
|
|
1192
|
+
typer.echo(f"Tag filter not matched: {actual_id}", err=True)
|
|
1193
|
+
raise typer.Exit(1)
|
|
1194
|
+
|
|
1081
1195
|
# Get version navigation
|
|
1082
1196
|
version_nav = kp.get_version_nav(actual_id, internal_version, collection=collection)
|
|
1083
1197
|
|
keep/config.py
CHANGED
|
@@ -19,7 +19,7 @@ import tomli_w
|
|
|
19
19
|
|
|
20
20
|
CONFIG_FILENAME = "keep.toml"
|
|
21
21
|
CONFIG_VERSION = 3 # Bumped for document versioning support
|
|
22
|
-
SYSTEM_DOCS_VERSION =
|
|
22
|
+
SYSTEM_DOCS_VERSION = 3 # Increment when bundled system docs content changes
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@dataclass
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
---
|
|
2
|
+
tags:
|
|
3
|
+
category: system
|
|
4
|
+
context: library
|
|
5
|
+
---
|
|
6
|
+
# Library
|
|
7
|
+
|
|
8
|
+
Public domain texts for testing and bootstrapping keep.
|
|
9
|
+
The content, as well as the format, is relevant to the practice of this skill.
|
|
10
|
+
|
|
11
|
+
## Resolving File Paths
|
|
12
|
+
|
|
13
|
+
The library files are located in the `docs/library/` directory of the keep package.
|
|
14
|
+
To construct URIs for these files:
|
|
15
|
+
|
|
16
|
+
1. **In a cloned repo:** Use `file://$PWD/docs/library/{filename}`
|
|
17
|
+
2. **With installed package:** Use Python to find the path:
|
|
18
|
+
```python
|
|
19
|
+
from importlib.resources import files
|
|
20
|
+
library_path = files("keep").parent / "docs" / "library"
|
|
21
|
+
uri = f"file://{library_path}/{filename}"
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Files
|
|
27
|
+
|
|
28
|
+
### ancrenewisse.pdf
|
|
29
|
+
- **URI template:** `file://{keep_library}/ancrenewisse.pdf`
|
|
30
|
+
- **Title:** Ancrene Wisse (Ancrene Riwle)
|
|
31
|
+
- **Date:** c. 1200s (13th century)
|
|
32
|
+
- **Language:** Middle English
|
|
33
|
+
- **Translator:** James Morton, The Camden Society, London 1853
|
|
34
|
+
- **Source:** https://www.bsswebsite.me.uk/History/AncreneRiwle/AncreneRiwle2.html
|
|
35
|
+
- **Status:** Public domain
|
|
36
|
+
- **Description:** A monastic guide for Christian anchoresses. Provides guidance on conduct with an "inner" and "outer" rule, and their relationship: "one relates to the right conduct of the heart; the other, to the regulation of the outward life".
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
### impermanence_verse.txt
|
|
41
|
+
- **URI template:** `file://{keep_library}/impermanence_verse.txt`
|
|
42
|
+
- **Title:** 無常偈 (Impermanence Verse / Closing Verse)
|
|
43
|
+
- **Date:** Traditional Zen liturgy (exact origin uncertain)
|
|
44
|
+
- **Language:** Japanese (Kanji/Kana), with romanization and multiple English translations
|
|
45
|
+
- **Source:** Soto Zen liturgy
|
|
46
|
+
- **Status:** Traditional teaching, freely shared
|
|
47
|
+
- **Description:** Four-line verse chanted at the end of Zen practice sessions. "Great is the matter of birth and death / Life slips quickly by / Time waits for no one / Wake up! Wake up!" Includes character-by-character breakdown, cultural context, and linguistic notes.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### mn61.html
|
|
52
|
+
- **URI template:** `file://{keep_library}/mn61.html`
|
|
53
|
+
- **Title:** Ambalaṭṭhikārāhulovāda Sutta (MN 61) - The Exhortation to Rāhula at Mango Stone
|
|
54
|
+
- **Date:** Original: ~5th century BCE; Translation: contemporary
|
|
55
|
+
- **Language:** English translation from Pali
|
|
56
|
+
- **Translator:** Thanissaro Bhikkhu
|
|
57
|
+
- **Source:** https://www.dhammatalks.org/suttas/MN/MN61.html
|
|
58
|
+
- **Format:** Raw HTML (complete with markup, navigation, footnotes)
|
|
59
|
+
- **License:** Freely distributed for educational use
|
|
60
|
+
- **Description:** Buddha's teaching to his son Rāhula on reflection before, during, and after bodily, verbal, and mental actions. The triple-check pattern: reflect before acting/speaking, check while doing, review after. Mirror metaphor for self-reflection.
|
|
61
|
+
**Format note:** Kept as raw HTML to test document processing and summarization on markup-heavy content.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
### an5.57_translation-en-sujato.json
|
|
66
|
+
- **URI template:** `file://{keep_library}/an5.57_translation-en-sujato.json`
|
|
67
|
+
- **Title:** Upajjhāyasutta (AN 5.57) - Subjects for Regular Reviewing
|
|
68
|
+
- **Date:** Original: ~5th century BCE; Translation: modern
|
|
69
|
+
- **Language:** English translation from Pali
|
|
70
|
+
- **Translator:** Bhikkhu Sujato
|
|
71
|
+
- **Source:** SuttaCentral
|
|
72
|
+
- **Source URL:** https://suttacentral.net/an5.57/en/sujato?lang=en
|
|
73
|
+
- **Data:** https://github.com/suttacentral/sc-data/blob/main/sc_bilara_data/translation/en/sujato/sutta/an/an5/an5.57_translation-en-sujato.json
|
|
74
|
+
- **License:** Creative Commons CC0 1.0 Universal (SuttaCentral translations)
|
|
75
|
+
- **Description:** The Five Remembrances - five subjects that all sentient beings should reflect on regularly: aging, sickness, death, separation from loved ones, and being heir to one's own actions. "Reviewing this subject often, they entirely give up bad conduct, or at least reduce it".
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### fortytwo_chapters.txt
|
|
80
|
+
- **URI template:** `file://{keep_library}/fortytwo_chapters.txt`
|
|
81
|
+
- **Title:** 佛說四十二章經 (Sutra of Forty-Two Chapters)
|
|
82
|
+
- **Date:** Eastern Han Dynasty (25-220 CE)
|
|
83
|
+
- **Language:** Classical Chinese
|
|
84
|
+
- **Source:** Project Gutenberg (#23585)
|
|
85
|
+
- **Status:** Public domain
|
|
86
|
+
- **Description:** One of the earliest Buddhist texts to reach China, traditionally attributed to translation by Kāśyapa Mātaṅga and Dharmarakṣa
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
### mumford_sticks_and_stones.txt
|
|
91
|
+
- **URI template:** `file://{keep_library}/mumford_sticks_and_stones.txt`
|
|
92
|
+
- **Title:** Sticks and Stones: A Study of American Architecture and Civilization
|
|
93
|
+
- **Author:** Lewis Mumford (1895-1990)
|
|
94
|
+
- **Date:** 1924
|
|
95
|
+
- **Language:** English
|
|
96
|
+
- **Source:** Internet Archive (sticksstones0000lewi)
|
|
97
|
+
- **Status:** Public domain (published before 1929)
|
|
98
|
+
- **Description:** Mumford's first major work on architecture, examining American building traditions from medieval influences through industrialization. Includes chapters on "The Medieval Tradition," "The Renaissance in New England," "The Age of Rationalism," and more.
|
|
99
|
+
|
|
100
|
+
**Note:** This is OCR text from archive.org. Quality is good but may contain occasional scanning artifacts.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
### true_person_no_rank.md
|
|
105
|
+
- **URI template:** `file://{keep_library}/true_person_no_rank.md`
|
|
106
|
+
- **Title:** 無位真人 (The True Person of No Rank)
|
|
107
|
+
- **Date:** Original: 9th century CE; Commentary layers: 9th-20th centuries
|
|
108
|
+
- **Language:** Chinese (verified original text) with English translation and commentary
|
|
109
|
+
- **Source:** Record of Linji (臨濟錄, Línjì Lù); Book of Serenity (從容錄) Case 38
|
|
110
|
+
- **Primary sources:** DILA Buddhist Dictionary, multiple scholarly translations
|
|
111
|
+
- **Status:** Core teaching in public domain; compiled with verification notes
|
|
112
|
+
- **Description:** Linji Yixuan's famous teaching: "Within this mass of red flesh, there is a true person of no rank, constantly coming and going through the gates of your face." Multi-layered document exploring the original teaching, koan tradition, Dōgen's commentary, modern interpretations, and linguistic analysis. Includes Chinese text (verified), translations, and commentary relationships.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Usage for Testing
|
|
117
|
+
|
|
118
|
+
These texts provide diverse test cases for keep:
|
|
119
|
+
|
|
120
|
+
1. **Different languages:** English, Chinese (Classical and modern romanization), Japanese, Middle English, Pali (via translation)
|
|
121
|
+
2. **Different formats:** PDF, plain text, JSON, Markdown, HTML (with markup)
|
|
122
|
+
3. **Different domains:** Buddhist teachings, Zen liturgy, architectural criticism, medieval instructional prose
|
|
123
|
+
4. **Different writing styles:** Ancient scripture, koan commentary, scholarly analysis, liturgical verse, teaching notes
|
|
124
|
+
5. **Different lengths:** Four-line verses to full books
|
|
125
|
+
6. **Different structures:** Linear narratives, multi-layered commentaries, character-by-character analysis, mirror patterns, web documents with navigation
|
|
126
|
+
7. **Multilingual content:** Japanese-English parallel texts, Chinese with romanization, cross-linguistic terminology
|
|
127
|
+
8. **Processing challenges:** Markdown, UTF-8 plaintext, OCR artifacts (Mumford), HTML markup (MN 61), PDF extraction (Ancrene Wisse), structured JSON data (AN5.57).
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Adding More Test Data
|
|
132
|
+
|
|
133
|
+
When adding public domain texts:
|
|
134
|
+
|
|
135
|
+
1. Verify their relevance to the practice of this skill
|
|
136
|
+
2. Verify compatibility with the MIT license, e.g. public domain status (pre-1929 for US, or explicit license)
|
|
137
|
+
3. Include source URL (Project Gutenberg, archive.org, etc.)
|
|
138
|
+
4. Add metadata to this index
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
Each text retains its original license status (public domain or Creative Commons as noted above). This index and dataset organization is released under CC0 1.0.
|
keep/providers/base.py
CHANGED
|
@@ -148,6 +148,45 @@ class EmbeddingProvider(Protocol):
|
|
|
148
148
|
# Summarization
|
|
149
149
|
# -----------------------------------------------------------------------------
|
|
150
150
|
|
|
151
|
+
# Shared system prompt for all LLM-based summarization providers
|
|
152
|
+
SUMMARIZATION_SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
153
|
+
Create a concise summary of the provided document that captures:
|
|
154
|
+
- The main purpose or topic
|
|
155
|
+
- Key points or functionality
|
|
156
|
+
- Important details that would help someone decide if this document is relevant
|
|
157
|
+
|
|
158
|
+
IMPORTANT: Start the summary directly with the content. Do NOT begin with phrases like:
|
|
159
|
+
- "Here is a concise summary"
|
|
160
|
+
- "This document describes"
|
|
161
|
+
- "The document covers"
|
|
162
|
+
- "Summary:"
|
|
163
|
+
Just state the facts directly. Keep the summary under 200 words."""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def strip_summary_preamble(text: str) -> str:
|
|
167
|
+
"""
|
|
168
|
+
Remove common LLM preambles from summaries.
|
|
169
|
+
|
|
170
|
+
Many models add introductory phrases despite instructions not to.
|
|
171
|
+
This post-processes the output to strip them.
|
|
172
|
+
"""
|
|
173
|
+
import re
|
|
174
|
+
preambles = [
|
|
175
|
+
r"^here is a concise summary[^:]*:\s*",
|
|
176
|
+
r"^here is the summary[^:]*:\s*",
|
|
177
|
+
r"^here's a summary[^:]*:\s*",
|
|
178
|
+
r"^summary:\s*",
|
|
179
|
+
r"^the document describes\s+",
|
|
180
|
+
r"^this document describes\s+",
|
|
181
|
+
r"^the document covers\s+",
|
|
182
|
+
r"^this document covers\s+",
|
|
183
|
+
]
|
|
184
|
+
result = text
|
|
185
|
+
for pattern in preambles:
|
|
186
|
+
result = re.sub(pattern, "", result, flags=re.IGNORECASE)
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
|
|
151
190
|
@runtime_checkable
|
|
152
191
|
class SummarizationProvider(Protocol):
|
|
153
192
|
"""
|
keep/providers/llm.py
CHANGED
|
@@ -6,7 +6,13 @@ import json
|
|
|
6
6
|
import os
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from .base import
|
|
9
|
+
from .base import (
|
|
10
|
+
SummarizationProvider,
|
|
11
|
+
TaggingProvider,
|
|
12
|
+
get_registry,
|
|
13
|
+
SUMMARIZATION_SYSTEM_PROMPT,
|
|
14
|
+
strip_summary_preamble,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
# -----------------------------------------------------------------------------
|
|
@@ -16,19 +22,11 @@ from .base import SummarizationProvider, TaggingProvider, get_registry
|
|
|
16
22
|
class AnthropicSummarization:
|
|
17
23
|
"""
|
|
18
24
|
Summarization provider using Anthropic's Claude API.
|
|
19
|
-
|
|
25
|
+
|
|
20
26
|
Requires: ANTHROPIC_API_KEY environment variable.
|
|
21
27
|
Optionally reads from OpenClaw config via OPENCLAW_CONFIG env var.
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
25
|
-
Create a concise summary of the provided document that captures:
|
|
26
|
-
- The main purpose or topic
|
|
27
|
-
- Key points or functionality
|
|
28
|
-
- Important details that would help someone decide if this document is relevant
|
|
29
|
-
|
|
30
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
|
|
31
|
-
|
|
32
30
|
def __init__(
|
|
33
31
|
self,
|
|
34
32
|
model: str = "claude-3-5-haiku-20241022",
|
|
@@ -56,22 +54,22 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
56
54
|
"""Generate summary using Anthropic Claude."""
|
|
57
55
|
# Truncate very long content
|
|
58
56
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
59
|
-
|
|
57
|
+
|
|
60
58
|
try:
|
|
61
59
|
response = self.client.messages.create(
|
|
62
60
|
model=self.model,
|
|
63
61
|
max_tokens=self.max_tokens,
|
|
64
|
-
system=
|
|
62
|
+
system=SUMMARIZATION_SYSTEM_PROMPT,
|
|
65
63
|
messages=[
|
|
66
64
|
{"role": "user", "content": truncated}
|
|
67
65
|
],
|
|
68
66
|
)
|
|
69
|
-
|
|
67
|
+
|
|
70
68
|
# Extract text from response
|
|
71
69
|
if response.content and len(response.content) > 0:
|
|
72
|
-
return response.content[0].text
|
|
70
|
+
return strip_summary_preamble(response.content[0].text)
|
|
73
71
|
return truncated[:500] # Fallback
|
|
74
|
-
except Exception
|
|
72
|
+
except Exception:
|
|
75
73
|
# Fallback to truncation on error
|
|
76
74
|
return truncated[:500]
|
|
77
75
|
|
|
@@ -79,18 +77,10 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
79
77
|
class OpenAISummarization:
|
|
80
78
|
"""
|
|
81
79
|
Summarization provider using OpenAI's chat API.
|
|
82
|
-
|
|
80
|
+
|
|
83
81
|
Requires: KEEP_OPENAI_API_KEY or OPENAI_API_KEY environment variable.
|
|
84
82
|
"""
|
|
85
|
-
|
|
86
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
87
|
-
Create a concise summary of the provided document that captures:
|
|
88
|
-
- The main purpose or topic
|
|
89
|
-
- Key points or functionality
|
|
90
|
-
- Important details that would help someone decide if this document is relevant
|
|
91
83
|
|
|
92
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
|
|
93
|
-
|
|
94
84
|
def __init__(
|
|
95
85
|
self,
|
|
96
86
|
model: str = "gpt-4o-mini",
|
|
@@ -101,41 +91,39 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
101
91
|
from openai import OpenAI
|
|
102
92
|
except ImportError:
|
|
103
93
|
raise RuntimeError("OpenAISummarization requires 'openai' library")
|
|
104
|
-
|
|
94
|
+
|
|
105
95
|
self.model = model
|
|
106
96
|
self.max_tokens = max_tokens
|
|
107
|
-
|
|
97
|
+
|
|
108
98
|
key = api_key or os.environ.get("KEEP_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
109
99
|
if not key:
|
|
110
100
|
raise ValueError("OpenAI API key required")
|
|
111
|
-
|
|
101
|
+
|
|
112
102
|
self._client = OpenAI(api_key=key)
|
|
113
|
-
|
|
103
|
+
|
|
114
104
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
115
105
|
"""Generate a summary using OpenAI."""
|
|
116
106
|
# Truncate very long content to avoid token limits
|
|
117
107
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
118
|
-
|
|
108
|
+
|
|
119
109
|
response = self._client.chat.completions.create(
|
|
120
110
|
model=self.model,
|
|
121
111
|
messages=[
|
|
122
|
-
{"role": "system", "content":
|
|
112
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
123
113
|
{"role": "user", "content": truncated},
|
|
124
114
|
],
|
|
125
115
|
max_tokens=self.max_tokens,
|
|
126
116
|
temperature=0.3,
|
|
127
117
|
)
|
|
128
|
-
|
|
129
|
-
return response.choices[0].message.content.strip()
|
|
118
|
+
|
|
119
|
+
return strip_summary_preamble(response.choices[0].message.content.strip())
|
|
130
120
|
|
|
131
121
|
|
|
132
122
|
class OllamaSummarization:
|
|
133
123
|
"""
|
|
134
124
|
Summarization provider using Ollama's local API.
|
|
135
125
|
"""
|
|
136
|
-
|
|
137
|
-
SYSTEM_PROMPT = OpenAISummarization.SYSTEM_PROMPT
|
|
138
|
-
|
|
126
|
+
|
|
139
127
|
def __init__(
|
|
140
128
|
self,
|
|
141
129
|
model: str = "llama3.2",
|
|
@@ -143,27 +131,27 @@ class OllamaSummarization:
|
|
|
143
131
|
):
|
|
144
132
|
self.model = model
|
|
145
133
|
self.base_url = base_url.rstrip("/")
|
|
146
|
-
|
|
134
|
+
|
|
147
135
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
148
136
|
"""Generate a summary using Ollama."""
|
|
149
137
|
import requests
|
|
150
|
-
|
|
138
|
+
|
|
151
139
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
152
|
-
|
|
140
|
+
|
|
153
141
|
response = requests.post(
|
|
154
142
|
f"{self.base_url}/api/chat",
|
|
155
143
|
json={
|
|
156
144
|
"model": self.model,
|
|
157
145
|
"messages": [
|
|
158
|
-
{"role": "system", "content":
|
|
146
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
159
147
|
{"role": "user", "content": truncated},
|
|
160
148
|
],
|
|
161
149
|
"stream": False,
|
|
162
150
|
},
|
|
163
151
|
)
|
|
164
152
|
response.raise_for_status()
|
|
165
|
-
|
|
166
|
-
return response.json()["message"]["content"].strip()
|
|
153
|
+
|
|
154
|
+
return strip_summary_preamble(response.json()["message"]["content"].strip())
|
|
167
155
|
|
|
168
156
|
|
|
169
157
|
class PassthroughSummarization:
|
keep/providers/mlx.py
CHANGED
|
@@ -10,7 +10,13 @@ Requires: pip install mlx-lm mlx
|
|
|
10
10
|
import os
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
from .base import
|
|
13
|
+
from .base import (
|
|
14
|
+
EmbeddingProvider,
|
|
15
|
+
SummarizationProvider,
|
|
16
|
+
get_registry,
|
|
17
|
+
SUMMARIZATION_SYSTEM_PROMPT,
|
|
18
|
+
strip_summary_preamble,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
|
|
16
22
|
class MLXEmbedding:
|
|
@@ -75,21 +81,12 @@ class MLXEmbedding:
|
|
|
75
81
|
class MLXSummarization:
|
|
76
82
|
"""
|
|
77
83
|
Summarization provider using MLX-LM on Apple Silicon.
|
|
78
|
-
|
|
84
|
+
|
|
79
85
|
Runs local LLMs optimized for Apple Silicon. No API key required.
|
|
80
|
-
|
|
86
|
+
|
|
81
87
|
Requires: pip install mlx-lm
|
|
82
88
|
"""
|
|
83
|
-
|
|
84
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
85
|
-
Create a concise summary of the provided document that captures:
|
|
86
|
-
- The main purpose or topic
|
|
87
|
-
- Key points or functionality
|
|
88
|
-
- Important details that would help someone decide if this document is relevant
|
|
89
89
|
|
|
90
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly.
|
|
91
|
-
Keep the summary under 200 words."""
|
|
92
|
-
|
|
93
90
|
def __init__(
|
|
94
91
|
self,
|
|
95
92
|
model: str = "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
|
@@ -122,27 +119,27 @@ Keep the summary under 200 words."""
|
|
|
122
119
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
123
120
|
"""Generate a summary using MLX-LM."""
|
|
124
121
|
from mlx_lm import generate
|
|
125
|
-
|
|
122
|
+
|
|
126
123
|
# Truncate very long content to fit context window
|
|
127
124
|
# Most models have 4k-8k context, leave room for prompt and response
|
|
128
125
|
max_content_chars = 12000
|
|
129
126
|
truncated = content[:max_content_chars] if len(content) > max_content_chars else content
|
|
130
|
-
|
|
127
|
+
|
|
131
128
|
# Format as chat (works with instruction-tuned models)
|
|
132
129
|
if hasattr(self._tokenizer, "apply_chat_template"):
|
|
133
130
|
messages = [
|
|
134
|
-
{"role": "system", "content":
|
|
131
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
135
132
|
{"role": "user", "content": f"Summarize the following:\n\n{truncated}"},
|
|
136
133
|
]
|
|
137
134
|
prompt = self._tokenizer.apply_chat_template(
|
|
138
|
-
messages,
|
|
139
|
-
tokenize=False,
|
|
135
|
+
messages,
|
|
136
|
+
tokenize=False,
|
|
140
137
|
add_generation_prompt=True
|
|
141
138
|
)
|
|
142
139
|
else:
|
|
143
140
|
# Fallback for models without chat template
|
|
144
|
-
prompt = f"{
|
|
145
|
-
|
|
141
|
+
prompt = f"{SUMMARIZATION_SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
|
|
142
|
+
|
|
146
143
|
# Generate
|
|
147
144
|
response = generate(
|
|
148
145
|
self._model,
|
|
@@ -151,8 +148,8 @@ Keep the summary under 200 words."""
|
|
|
151
148
|
max_tokens=self.max_tokens,
|
|
152
149
|
verbose=False,
|
|
153
150
|
)
|
|
154
|
-
|
|
155
|
-
return response.strip()
|
|
151
|
+
|
|
152
|
+
return strip_summary_preamble(response.strip())
|
|
156
153
|
|
|
157
154
|
|
|
158
155
|
class MLXTagging:
|
keep/store.py
CHANGED
|
@@ -556,3 +556,34 @@ class ChromaStore:
|
|
|
556
556
|
"""Return the number of items in a collection."""
|
|
557
557
|
coll = self._get_collection(collection)
|
|
558
558
|
return coll.count()
|
|
559
|
+
|
|
560
|
+
# -------------------------------------------------------------------------
|
|
561
|
+
# Resource Management
|
|
562
|
+
# -------------------------------------------------------------------------
|
|
563
|
+
|
|
564
|
+
def close(self) -> None:
|
|
565
|
+
"""
|
|
566
|
+
Close ChromaDB client and release resources.
|
|
567
|
+
|
|
568
|
+
Good practice to call when done, though Python's GC will clean up eventually.
|
|
569
|
+
"""
|
|
570
|
+
self._collections.clear()
|
|
571
|
+
# ChromaDB PersistentClient doesn't have explicit close(),
|
|
572
|
+
# but clearing references allows garbage collection
|
|
573
|
+
self._client = None
|
|
574
|
+
|
|
575
|
+
def __enter__(self):
|
|
576
|
+
"""Context manager entry."""
|
|
577
|
+
return self
|
|
578
|
+
|
|
579
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
580
|
+
"""Context manager exit - close resources."""
|
|
581
|
+
self.close()
|
|
582
|
+
return False
|
|
583
|
+
|
|
584
|
+
def __del__(self):
|
|
585
|
+
"""Cleanup on deletion."""
|
|
586
|
+
try:
|
|
587
|
+
self.close()
|
|
588
|
+
except Exception:
|
|
589
|
+
pass # Suppress errors during garbage collection
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: keep-skill
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: Reflective memory - remember and search documents by meaning
|
|
5
5
|
Project-URL: Homepage, https://github.com/hughpyle/keep
|
|
6
6
|
Project-URL: Repository, https://github.com/hughpyle/keep
|
|
@@ -24,6 +24,7 @@ Requires-Dist: typer>=0.9
|
|
|
24
24
|
Provides-Extra: anthropic
|
|
25
25
|
Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
|
|
26
26
|
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'dev'
|
|
27
28
|
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
28
29
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
30
|
Provides-Extra: documents
|
|
@@ -60,7 +61,7 @@ uv tool install 'keep-skill[local]'
|
|
|
60
61
|
keep init
|
|
61
62
|
|
|
62
63
|
# Index content
|
|
63
|
-
keep update path/to/document.md -t project=myapp
|
|
64
|
+
keep update file:///path/to/document.md -t project=myapp
|
|
64
65
|
keep update "Rate limit is 100 req/min" -t topic=api
|
|
65
66
|
|
|
66
67
|
# Search by meaning
|
|
@@ -68,7 +69,7 @@ keep find "what's the rate limit?"
|
|
|
68
69
|
|
|
69
70
|
# Track what you're working on
|
|
70
71
|
keep now "Debugging auth flow"
|
|
71
|
-
keep now -V 1 # Previous
|
|
72
|
+
keep now -V 1 # Previous intentions
|
|
72
73
|
```
|
|
73
74
|
|
|
74
75
|
---
|
|
@@ -115,6 +116,7 @@ keep init # Creates .keep/ at repo root
|
|
|
115
116
|
|
|
116
117
|
# Index files and notes
|
|
117
118
|
keep update file:///path/to/doc.md -t project=myapp
|
|
119
|
+
keep update "Token refresh needs clock sync" -t topic=auth
|
|
118
120
|
keep update "Important insight" -t type=note
|
|
119
121
|
|
|
120
122
|
# Search
|
|
@@ -129,11 +131,12 @@ keep get ID --history # All versions
|
|
|
129
131
|
|
|
130
132
|
# Tags
|
|
131
133
|
keep list --tag project=myapp # Find by tag
|
|
134
|
+
keep find "auth" -t topic=auth # Cross-project topic search
|
|
132
135
|
keep list --tags= # List all tag keys
|
|
133
136
|
|
|
134
|
-
# Current
|
|
137
|
+
# Current intentions
|
|
135
138
|
keep now # Show what you're working on
|
|
136
|
-
keep now "Fixing login bug" # Update
|
|
139
|
+
keep now "Fixing login bug" # Update intentions
|
|
137
140
|
```
|
|
138
141
|
|
|
139
142
|
### Python API
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
keep/__init__.py,sha256
|
|
1
|
+
keep/__init__.py,sha256=-KvH6aY6B2n2kKGSizYqoQK38FWOa5lEfzYyRst33lU,1622
|
|
2
2
|
keep/__main__.py,sha256=3Uu70IhIDIjh8OW6jp9jQQ3dF2lKdJWi_3FtRIQMiMY,104
|
|
3
|
-
keep/api.py,sha256=
|
|
3
|
+
keep/api.py,sha256=GGMY6w2a1b-Yve1aWz35dhZuSA9Va5Whe6M1Ek87yw4,65105
|
|
4
4
|
keep/chunking.py,sha256=neAXOLSvVwbUxapbqq7nZrbSNSzMXuhxj-ODoOSodsU,11830
|
|
5
|
-
keep/cli.py,sha256=
|
|
6
|
-
keep/config.py,sha256=
|
|
5
|
+
keep/cli.py,sha256=Kwm5kwBiFH92BlKIHRaNN3UTSRexEclgcaPtk3SGpSs,49922
|
|
6
|
+
keep/config.py,sha256=UTDjhUPiGVbBBmkGFgprE_3B8OC13HgtX18cix-NLCc,16226
|
|
7
7
|
keep/context.py,sha256=CNpjmrv6eW2kV1E0MO6qAQfhYKRlfzAL--6v4Mj1nFY,71
|
|
8
8
|
keep/document_store.py,sha256=UswqKIGSc5E-r7Tg9k0g5-byYnuar3e9FieQ7WNod9k,29109
|
|
9
9
|
keep/errors.py,sha256=G9e5FbdfeugyfHOuL_SPZlM5jgWWnwsX4hM7IzanBZc,857
|
|
@@ -11,23 +11,24 @@ keep/indexing.py,sha256=dpPYo3WXnIhFDWinz5ZBZVk7_qumeNpP4EpOIY0zMbs,6063
|
|
|
11
11
|
keep/logging_config.py,sha256=IGwkgIyg-TfYaT4MnoCXfmjeHAe_wsB_XQ1QhVT_ro8,3503
|
|
12
12
|
keep/paths.py,sha256=Dv7pM6oo2QgjL6sj5wPjhuMOK2wqUkfd4Kz08TwJ1ps,3331
|
|
13
13
|
keep/pending_summaries.py,sha256=_irGe7P1Lmog2c5cEgx-BElpq4YJW-tEmF5A3IUZQbQ,5727
|
|
14
|
-
keep/store.py,sha256=
|
|
14
|
+
keep/store.py,sha256=JjgqxW6NGpQa_FEOl9KIQ39IkRIVWIHd9gRoRdWvEKk,18867
|
|
15
15
|
keep/types.py,sha256=irvUJYUHQgQdVqC4_lgrG0FbTN1BdZqFxZr0ubVPSG4,2314
|
|
16
16
|
keep/data/__init__.py,sha256=C1YARrudHwK2Bmlxkh7dZlIaNJ5m5WrSTglCdG8e3T0,24
|
|
17
17
|
keep/data/system/__init__.py,sha256=Rp92_sBO3kscuWXJomo0HKeHfU-N4BgBeT3-5El0Mcg,28
|
|
18
18
|
keep/data/system/conversations.md,sha256=jE53wYSUyu5uPFNtO1Tu6w4f5QxqLei7muxLF_kZE2s,9837
|
|
19
19
|
keep/data/system/domains.md,sha256=EHE6zU2-lx7UeLqyOTmoWl1WVlvgRq3_QnFb_EZceEY,5584
|
|
20
|
+
keep/data/system/library.md,sha256=rmA4LBtgGOI5vEB9ohjhrLptWNIRvjQCfxuHxY3D4LU,7471
|
|
20
21
|
keep/data/system/now.md,sha256=GyQo_LizSIVKbj5q52q4ErV-nxz8rzUOlkILjgNu25s,388
|
|
21
22
|
keep/providers/__init__.py,sha256=6AwJYc6cF1ZT6BcU_6ATyeWk7MHohdVU2-ccqDSvCHU,1094
|
|
22
|
-
keep/providers/base.py,sha256=
|
|
23
|
+
keep/providers/base.py,sha256=qUpVbgLHH4zdvimcM0YMyJnExF7WJ7_U0w_sslpPhSI,15897
|
|
23
24
|
keep/providers/documents.py,sha256=EXeSy5i3RUL0kciIC6w3ldAEfbTIyC5fgfzC_WAI0iY,8211
|
|
24
25
|
keep/providers/embedding_cache.py,sha256=gna6PZEJanbn2GUN0vj1b1MC0xVWePM9cot2KgZUdu8,8856
|
|
25
26
|
keep/providers/embeddings.py,sha256=zi8GyitKexdbCJyU1nLrUhGt_zzPn3udYrrPZ5Ak8Wo,9081
|
|
26
|
-
keep/providers/llm.py,sha256=
|
|
27
|
-
keep/providers/mlx.py,sha256=
|
|
27
|
+
keep/providers/llm.py,sha256=Pcq1fK7NXBzdVrQegjmAFmuHdZXpQraApr8M6O6hJFE,11680
|
|
28
|
+
keep/providers/mlx.py,sha256=xQTXM9kYWUhfqpRVPNCDyF2nkOo50ZYs5DxHELbFB4g,8707
|
|
28
29
|
keep/providers/summarization.py,sha256=MlVTcYipaqp2lT-QYnznp0AMuPVG36QfcTQnvY7Gb-Q,3409
|
|
29
|
-
keep_skill-0.
|
|
30
|
-
keep_skill-0.
|
|
31
|
-
keep_skill-0.
|
|
32
|
-
keep_skill-0.
|
|
33
|
-
keep_skill-0.
|
|
30
|
+
keep_skill-0.10.0.dist-info/METADATA,sha256=jMADwUfBCyw8r7OIcOJUdpxEEheYRlXqBIAf7VVO6eM,6238
|
|
31
|
+
keep_skill-0.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
32
|
+
keep_skill-0.10.0.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
|
|
33
|
+
keep_skill-0.10.0.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
|
|
34
|
+
keep_skill-0.10.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|