keep-skill 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keep/__init__.py +1 -1
- keep/api.py +23 -10
- keep/cli.py +300 -32
- keep/config.py +29 -1
- keep/data/system/library.md +144 -0
- keep/providers/base.py +39 -0
- keep/providers/llm.py +29 -41
- keep/providers/mlx.py +18 -21
- keep/store.py +31 -0
- {keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/METADATA +5 -2
- {keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/RECORD +14 -13
- {keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/WHEEL +0 -0
- {keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/entry_points.txt +0 -0
- {keep_skill-0.9.0.dist-info → keep_skill-0.11.0.dist-info}/licenses/LICENSE +0 -0
keep/__init__.py
CHANGED
keep/api.py
CHANGED
|
@@ -191,6 +191,7 @@ SYSTEM_DOC_IDS = {
|
|
|
191
191
|
"now.md": "_system:now",
|
|
192
192
|
"conversations.md": "_system:conversations",
|
|
193
193
|
"domains.md": "_system:domains",
|
|
194
|
+
"library.md": "_system:library",
|
|
194
195
|
}
|
|
195
196
|
|
|
196
197
|
|
|
@@ -404,24 +405,25 @@ class Keeper:
|
|
|
404
405
|
except Exception as e:
|
|
405
406
|
logger.debug("Error scanning old system docs: %s", e)
|
|
406
407
|
|
|
407
|
-
# Second pass: create
|
|
408
|
+
# Second pass: create or update system docs from bundled content
|
|
408
409
|
for path in SYSTEM_DOC_DIR.glob("*.md"):
|
|
409
410
|
new_id = SYSTEM_DOC_IDS.get(path.name)
|
|
410
411
|
if new_id is None:
|
|
411
412
|
logger.debug("Skipping unknown system doc: %s", path.name)
|
|
412
413
|
continue
|
|
413
414
|
|
|
414
|
-
# Skip if already exists
|
|
415
|
-
if self.exists(new_id):
|
|
416
|
-
stats["skipped"] += 1
|
|
417
|
-
continue
|
|
418
|
-
|
|
419
415
|
try:
|
|
420
416
|
content, tags = _load_frontmatter(path)
|
|
421
417
|
tags["category"] = "system"
|
|
418
|
+
existed = self.exists(new_id)
|
|
419
|
+
# remember() handles both create and update (with re-summarization)
|
|
422
420
|
self.remember(content, id=new_id, tags=tags)
|
|
423
|
-
|
|
424
|
-
|
|
421
|
+
if existed:
|
|
422
|
+
stats["migrated"] += 1
|
|
423
|
+
logger.info("Updated system doc: %s", new_id)
|
|
424
|
+
else:
|
|
425
|
+
stats["created"] += 1
|
|
426
|
+
logger.info("Created system doc: %s", new_id)
|
|
425
427
|
except FileNotFoundError:
|
|
426
428
|
# System file missing - skip silently
|
|
427
429
|
pass
|
|
@@ -1776,10 +1778,18 @@ class Keeper:
|
|
|
1776
1778
|
|
|
1777
1779
|
def close(self) -> None:
|
|
1778
1780
|
"""
|
|
1779
|
-
Close resources (
|
|
1781
|
+
Close resources (stores, caches, queues).
|
|
1780
1782
|
|
|
1781
1783
|
Good practice to call when done, though Python's GC will clean up eventually.
|
|
1782
1784
|
"""
|
|
1785
|
+
# Close ChromaDB store
|
|
1786
|
+
if hasattr(self, '_store') and self._store is not None:
|
|
1787
|
+
self._store.close()
|
|
1788
|
+
|
|
1789
|
+
# Close document store (SQLite)
|
|
1790
|
+
if hasattr(self, '_document_store') and self._document_store is not None:
|
|
1791
|
+
self._document_store.close()
|
|
1792
|
+
|
|
1783
1793
|
# Close embedding cache if it was loaded
|
|
1784
1794
|
if self._embedding_provider is not None:
|
|
1785
1795
|
if hasattr(self._embedding_provider, '_cache'):
|
|
@@ -1802,4 +1812,7 @@ class Keeper:
|
|
|
1802
1812
|
|
|
1803
1813
|
def __del__(self):
|
|
1804
1814
|
"""Cleanup on deletion."""
|
|
1805
|
-
|
|
1815
|
+
try:
|
|
1816
|
+
self.close()
|
|
1817
|
+
except Exception:
|
|
1818
|
+
pass # Suppress errors during garbage collection
|
keep/cli.py
CHANGED
|
@@ -25,6 +25,7 @@ VERSION_SUFFIX_PATTERN = re.compile(r'@V\{(\d+)\}$')
|
|
|
25
25
|
_URI_SCHEME_PATTERN = re.compile(r'^[a-zA-Z][a-zA-Z0-9+.-]*://')
|
|
26
26
|
|
|
27
27
|
from .api import Keeper, _text_content_id
|
|
28
|
+
from .config import get_tool_directory
|
|
28
29
|
from .document_store import VersionInfo
|
|
29
30
|
from .types import Item
|
|
30
31
|
from .logging_config import configure_quiet_mode, enable_debug_mode
|
|
@@ -38,6 +39,13 @@ else:
|
|
|
38
39
|
configure_quiet_mode(quiet=True)
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
def _version_callback(value: bool):
|
|
43
|
+
if value:
|
|
44
|
+
from importlib.metadata import version
|
|
45
|
+
print(f"keep {version('keep-skill')}")
|
|
46
|
+
raise typer.Exit()
|
|
47
|
+
|
|
48
|
+
|
|
41
49
|
def _verbose_callback(value: bool):
|
|
42
50
|
if value:
|
|
43
51
|
enable_debug_mode()
|
|
@@ -242,6 +250,12 @@ def main_callback(
|
|
|
242
250
|
callback=_full_callback,
|
|
243
251
|
is_eager=True,
|
|
244
252
|
)] = False,
|
|
253
|
+
version: Annotated[Optional[bool], typer.Option(
|
|
254
|
+
"--version",
|
|
255
|
+
help="Show version and exit",
|
|
256
|
+
callback=_version_callback,
|
|
257
|
+
is_eager=True,
|
|
258
|
+
)] = None,
|
|
245
259
|
store: Annotated[Optional[Path], typer.Option(
|
|
246
260
|
"--store", "-s",
|
|
247
261
|
envvar="KEEP_STORE_PATH",
|
|
@@ -285,6 +299,7 @@ CollectionOption = Annotated[
|
|
|
285
299
|
str,
|
|
286
300
|
typer.Option(
|
|
287
301
|
"--collection", "-c",
|
|
302
|
+
envvar="KEEP_COLLECTION",
|
|
288
303
|
help="Collection name"
|
|
289
304
|
)
|
|
290
305
|
]
|
|
@@ -442,6 +457,28 @@ def _parse_tags(tags: Optional[list[str]]) -> dict[str, str]:
|
|
|
442
457
|
return parsed
|
|
443
458
|
|
|
444
459
|
|
|
460
|
+
def _filter_by_tags(items: list, tags: list[str]) -> list:
|
|
461
|
+
"""
|
|
462
|
+
Filter items by tag specifications (AND logic).
|
|
463
|
+
|
|
464
|
+
Each tag can be:
|
|
465
|
+
- "key" - item must have this tag key (any value)
|
|
466
|
+
- "key=value" - item must have this exact tag
|
|
467
|
+
"""
|
|
468
|
+
if not tags:
|
|
469
|
+
return items
|
|
470
|
+
|
|
471
|
+
result = items
|
|
472
|
+
for t in tags:
|
|
473
|
+
if "=" in t:
|
|
474
|
+
key, value = t.split("=", 1)
|
|
475
|
+
result = [item for item in result if item.tags.get(key) == value]
|
|
476
|
+
else:
|
|
477
|
+
# Key only - check if key exists
|
|
478
|
+
result = [item for item in result if t in item.tags]
|
|
479
|
+
return result
|
|
480
|
+
|
|
481
|
+
|
|
445
482
|
def _timestamp() -> str:
|
|
446
483
|
"""Generate timestamp for auto-generated IDs."""
|
|
447
484
|
from datetime import datetime, timezone
|
|
@@ -475,6 +512,10 @@ def find(
|
|
|
475
512
|
include_self: Annotated[bool, typer.Option(
|
|
476
513
|
help="Include the queried item (only with --id)"
|
|
477
514
|
)] = False,
|
|
515
|
+
tag: Annotated[Optional[list[str]], typer.Option(
|
|
516
|
+
"--tag", "-t",
|
|
517
|
+
help="Filter by tag (key or key=value, repeatable)"
|
|
518
|
+
)] = None,
|
|
478
519
|
store: StoreOption = None,
|
|
479
520
|
collection: CollectionOption = "default",
|
|
480
521
|
limit: LimitOption = 10,
|
|
@@ -487,6 +528,7 @@ def find(
|
|
|
487
528
|
Examples:
|
|
488
529
|
keep find "authentication" # Search by text
|
|
489
530
|
keep find --id file:///path/to/doc.md # Find similar to item
|
|
531
|
+
keep find "auth" -t project=myapp # Search + filter by tag
|
|
490
532
|
"""
|
|
491
533
|
if id and query:
|
|
492
534
|
typer.echo("Error: Specify either a query or --id, not both", err=True)
|
|
@@ -497,12 +539,19 @@ def find(
|
|
|
497
539
|
|
|
498
540
|
kp = _get_keeper(store, collection)
|
|
499
541
|
|
|
542
|
+
# Search with higher limit if filtering, then post-filter
|
|
543
|
+
search_limit = limit * 5 if tag else limit
|
|
544
|
+
|
|
500
545
|
if id:
|
|
501
|
-
results = kp.find_similar(id, limit=
|
|
546
|
+
results = kp.find_similar(id, limit=search_limit, since=since, include_self=include_self)
|
|
502
547
|
else:
|
|
503
|
-
results = kp.find(query, limit=
|
|
548
|
+
results = kp.find(query, limit=search_limit, since=since)
|
|
504
549
|
|
|
505
|
-
|
|
550
|
+
# Post-filter by tags if specified
|
|
551
|
+
if tag:
|
|
552
|
+
results = _filter_by_tags(results, tag)
|
|
553
|
+
|
|
554
|
+
typer.echo(_format_items(results[:limit], as_json=_get_json_output()))
|
|
506
555
|
|
|
507
556
|
|
|
508
557
|
@app.command()
|
|
@@ -752,7 +801,7 @@ def now(
|
|
|
752
801
|
collection: CollectionOption = "default",
|
|
753
802
|
tags: Annotated[Optional[list[str]], typer.Option(
|
|
754
803
|
"--tag", "-t",
|
|
755
|
-
help="
|
|
804
|
+
help="Set tag (with content) or filter (without content)"
|
|
756
805
|
)] = None,
|
|
757
806
|
):
|
|
758
807
|
"""
|
|
@@ -761,10 +810,17 @@ def now(
|
|
|
761
810
|
With no arguments, displays the current intentions.
|
|
762
811
|
With content, replaces it.
|
|
763
812
|
|
|
813
|
+
\b
|
|
814
|
+
Tags behave differently based on mode:
|
|
815
|
+
- With content: -t sets tags on the update
|
|
816
|
+
- Without content: -t filters version history
|
|
817
|
+
|
|
764
818
|
\b
|
|
765
819
|
Examples:
|
|
766
820
|
keep now # Show current intentions
|
|
767
821
|
keep now "What's important now" # Update intentions
|
|
822
|
+
keep now "Auth work" -t project=myapp # Update with tag
|
|
823
|
+
keep now -t project=myapp # Find version with tag
|
|
768
824
|
keep now -f context.md # Read content from file
|
|
769
825
|
keep now --reset # Reset to default from system
|
|
770
826
|
keep now -V 1 # Previous version
|
|
@@ -887,18 +943,70 @@ def now(
|
|
|
887
943
|
item = kp.set_now(new_content, tags=parsed_tags or None)
|
|
888
944
|
typer.echo(_format_item(item, as_json=_get_json_output()))
|
|
889
945
|
else:
|
|
890
|
-
# Get current intentions
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
946
|
+
# Get current intentions (or search version history if tags specified)
|
|
947
|
+
if tags:
|
|
948
|
+
# Search version history for most recent version with matching tags
|
|
949
|
+
item = _find_now_version_by_tags(kp, tags, collection)
|
|
950
|
+
if item is None:
|
|
951
|
+
typer.echo("No version found matching tags", err=True)
|
|
952
|
+
raise typer.Exit(1)
|
|
953
|
+
# No version nav or similar items for filtered results
|
|
954
|
+
typer.echo(_format_item(item, as_json=_get_json_output()))
|
|
955
|
+
else:
|
|
956
|
+
# Standard: get current with version navigation and similar items
|
|
957
|
+
item = kp.get_now()
|
|
958
|
+
version_nav = kp.get_version_nav(NOWDOC_ID, None, collection=collection)
|
|
959
|
+
similar_items = kp.get_similar_for_display(NOWDOC_ID, limit=3, collection=collection)
|
|
960
|
+
similar_offsets = {s.id: kp.get_version_offset(s) for s in similar_items}
|
|
961
|
+
typer.echo(_format_item(
|
|
962
|
+
item,
|
|
963
|
+
as_json=_get_json_output(),
|
|
964
|
+
version_nav=version_nav,
|
|
965
|
+
similar_items=similar_items,
|
|
966
|
+
similar_offsets=similar_offsets,
|
|
967
|
+
))
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def _find_now_version_by_tags(kp, tags: list[str], collection: str):
|
|
971
|
+
"""
|
|
972
|
+
Search nowdoc version history for most recent version matching all tags.
|
|
973
|
+
|
|
974
|
+
Checks current version first, then scans archived versions.
|
|
975
|
+
"""
|
|
976
|
+
from .api import NOWDOC_ID
|
|
977
|
+
|
|
978
|
+
# Parse tag filters
|
|
979
|
+
tag_filters = []
|
|
980
|
+
for t in tags:
|
|
981
|
+
if "=" in t:
|
|
982
|
+
key, value = t.split("=", 1)
|
|
983
|
+
tag_filters.append((key, value))
|
|
984
|
+
else:
|
|
985
|
+
tag_filters.append((t, None)) # Key only
|
|
986
|
+
|
|
987
|
+
def matches_tags(item_tags: dict) -> bool:
|
|
988
|
+
for key, value in tag_filters:
|
|
989
|
+
if value is not None:
|
|
990
|
+
if item_tags.get(key) != value:
|
|
991
|
+
return False
|
|
992
|
+
else:
|
|
993
|
+
if key not in item_tags:
|
|
994
|
+
return False
|
|
995
|
+
return True
|
|
996
|
+
|
|
997
|
+
# Check current version first
|
|
998
|
+
current = kp.get_now()
|
|
999
|
+
if current and matches_tags(current.tags):
|
|
1000
|
+
return current
|
|
1001
|
+
|
|
1002
|
+
# Scan archived versions (newest first)
|
|
1003
|
+
versions = kp.list_versions(NOWDOC_ID, limit=100, collection=collection)
|
|
1004
|
+
for i, v in enumerate(versions):
|
|
1005
|
+
if matches_tags(v.tags):
|
|
1006
|
+
# Found match - get full item at this version offset
|
|
1007
|
+
return kp.get_version(NOWDOC_ID, i + 1, collection=collection)
|
|
1008
|
+
|
|
1009
|
+
return None
|
|
902
1010
|
|
|
903
1011
|
|
|
904
1012
|
@app.command()
|
|
@@ -920,6 +1028,10 @@ def get(
|
|
|
920
1028
|
"--no-similar",
|
|
921
1029
|
help="Suppress similar items in output"
|
|
922
1030
|
)] = False,
|
|
1031
|
+
tag: Annotated[Optional[list[str]], typer.Option(
|
|
1032
|
+
"--tag", "-t",
|
|
1033
|
+
help="Require tag (key or key=value, repeatable)"
|
|
1034
|
+
)] = None,
|
|
923
1035
|
limit: Annotated[int, typer.Option(
|
|
924
1036
|
"--limit", "-n",
|
|
925
1037
|
help="Max items for --history or --similar (default: 10)"
|
|
@@ -940,6 +1052,7 @@ def get(
|
|
|
940
1052
|
keep get doc:1 --history # List all versions
|
|
941
1053
|
keep get doc:1 --similar # List similar items
|
|
942
1054
|
keep get doc:1 --no-similar # Suppress similar items
|
|
1055
|
+
keep get doc:1 -t project=myapp # Only if tag matches
|
|
943
1056
|
"""
|
|
944
1057
|
kp = _get_keeper(store, collection)
|
|
945
1058
|
|
|
@@ -1074,6 +1187,13 @@ def get(
|
|
|
1074
1187
|
typer.echo(f"Not found: {actual_id}", err=True)
|
|
1075
1188
|
raise typer.Exit(1)
|
|
1076
1189
|
|
|
1190
|
+
# Check tag filter if specified
|
|
1191
|
+
if tag:
|
|
1192
|
+
filtered = _filter_by_tags([item], tag)
|
|
1193
|
+
if not filtered:
|
|
1194
|
+
typer.echo(f"Tag filter not matched: {actual_id}", err=True)
|
|
1195
|
+
raise typer.Exit(1)
|
|
1196
|
+
|
|
1077
1197
|
# Get version navigation
|
|
1078
1198
|
version_nav = kp.get_version_nav(actual_id, internal_version, collection=collection)
|
|
1079
1199
|
|
|
@@ -1153,12 +1273,146 @@ def init(
|
|
|
1153
1273
|
|
|
1154
1274
|
|
|
1155
1275
|
|
|
1276
|
+
def _get_config_value(kp: Keeper, path: str):
|
|
1277
|
+
"""
|
|
1278
|
+
Get config value by dotted path.
|
|
1279
|
+
|
|
1280
|
+
Special paths (not in TOML):
|
|
1281
|
+
file - config file location
|
|
1282
|
+
tool - package directory (SKILL.md location)
|
|
1283
|
+
store - store path
|
|
1284
|
+
collections - list of collections
|
|
1285
|
+
|
|
1286
|
+
Dotted paths into config:
|
|
1287
|
+
providers - all provider config
|
|
1288
|
+
providers.embedding - embedding provider name
|
|
1289
|
+
providers.summarization - summarization provider name
|
|
1290
|
+
embedding.* - embedding config details
|
|
1291
|
+
summarization.* - summarization config details
|
|
1292
|
+
tags - default tags
|
|
1293
|
+
"""
|
|
1294
|
+
cfg = kp._config
|
|
1295
|
+
|
|
1296
|
+
# Special built-in paths (not in TOML)
|
|
1297
|
+
if path == "file":
|
|
1298
|
+
return str(cfg.config_path) if cfg else None
|
|
1299
|
+
if path == "tool":
|
|
1300
|
+
return str(get_tool_directory())
|
|
1301
|
+
if path == "store":
|
|
1302
|
+
return str(kp._store_path)
|
|
1303
|
+
if path == "collections":
|
|
1304
|
+
return kp.list_collections()
|
|
1305
|
+
|
|
1306
|
+
# Provider shortcuts
|
|
1307
|
+
if path == "providers":
|
|
1308
|
+
if cfg:
|
|
1309
|
+
return {
|
|
1310
|
+
"embedding": cfg.embedding.name,
|
|
1311
|
+
"summarization": cfg.summarization.name,
|
|
1312
|
+
"document": cfg.document.name,
|
|
1313
|
+
}
|
|
1314
|
+
return None
|
|
1315
|
+
if path == "providers.embedding":
|
|
1316
|
+
return cfg.embedding.name if cfg else None
|
|
1317
|
+
if path == "providers.summarization":
|
|
1318
|
+
return cfg.summarization.name if cfg else None
|
|
1319
|
+
if path == "providers.document":
|
|
1320
|
+
return cfg.document.name if cfg else None
|
|
1321
|
+
|
|
1322
|
+
# Tags shortcut
|
|
1323
|
+
if path == "tags":
|
|
1324
|
+
return cfg.default_tags if cfg else {}
|
|
1325
|
+
|
|
1326
|
+
# Dotted path into config attributes
|
|
1327
|
+
if not cfg:
|
|
1328
|
+
raise typer.BadParameter(f"No config loaded, cannot access: {path}")
|
|
1329
|
+
|
|
1330
|
+
parts = path.split(".")
|
|
1331
|
+
value = cfg
|
|
1332
|
+
for part in parts:
|
|
1333
|
+
if hasattr(value, part):
|
|
1334
|
+
value = getattr(value, part)
|
|
1335
|
+
elif hasattr(value, "params") and part in value.params:
|
|
1336
|
+
# Provider config params
|
|
1337
|
+
value = value.params[part]
|
|
1338
|
+
elif isinstance(value, dict) and part in value:
|
|
1339
|
+
value = value[part]
|
|
1340
|
+
else:
|
|
1341
|
+
raise typer.BadParameter(f"Unknown config path: {path}")
|
|
1342
|
+
|
|
1343
|
+
# Return name for provider objects
|
|
1344
|
+
if hasattr(value, "name") and hasattr(value, "params"):
|
|
1345
|
+
return value.name
|
|
1346
|
+
return value
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
# Settings that may not be configured but are available
|
|
1350
|
+
AVAILABLE_SETTINGS = {
|
|
1351
|
+
"tags": {
|
|
1352
|
+
"description": "Default tags applied to all operations",
|
|
1353
|
+
"example": {"project": "myproject", "topic": "mytopic"},
|
|
1354
|
+
},
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
|
|
1358
|
+
def _format_config_with_defaults(kp: Keeper) -> str:
|
|
1359
|
+
"""Format config output with commented defaults for unused settings."""
|
|
1360
|
+
cfg = kp._config
|
|
1361
|
+
config_path = cfg.config_path if cfg else None
|
|
1362
|
+
store_path = kp._store_path
|
|
1363
|
+
lines = []
|
|
1364
|
+
|
|
1365
|
+
# Show paths
|
|
1366
|
+
lines.append(f"file: {config_path}")
|
|
1367
|
+
lines.append(f"tool: {get_tool_directory()}")
|
|
1368
|
+
if cfg and cfg.config_dir and cfg.config_dir.resolve() != store_path.resolve():
|
|
1369
|
+
lines.append(f"store: {store_path}")
|
|
1370
|
+
else:
|
|
1371
|
+
lines.append(f"store: {store_path}")
|
|
1372
|
+
|
|
1373
|
+
lines.append(f"collections: {kp.list_collections()}")
|
|
1374
|
+
|
|
1375
|
+
if cfg:
|
|
1376
|
+
lines.append("")
|
|
1377
|
+
lines.append("providers:")
|
|
1378
|
+
lines.append(f" embedding: {cfg.embedding.name}")
|
|
1379
|
+
lines.append(f" summarization: {cfg.summarization.name}")
|
|
1380
|
+
lines.append(f" document: {cfg.document.name}")
|
|
1381
|
+
|
|
1382
|
+
# Show configured tags if any
|
|
1383
|
+
if cfg.default_tags:
|
|
1384
|
+
lines.append("")
|
|
1385
|
+
lines.append("tags:")
|
|
1386
|
+
for key, value in cfg.default_tags.items():
|
|
1387
|
+
lines.append(f" {key}: {value}")
|
|
1388
|
+
else:
|
|
1389
|
+
# Show commented example for tags
|
|
1390
|
+
lines.append("")
|
|
1391
|
+
lines.append("# tags:")
|
|
1392
|
+
lines.append("# project: myproject")
|
|
1393
|
+
lines.append("# topic: mytopic")
|
|
1394
|
+
|
|
1395
|
+
return "\n".join(lines)
|
|
1396
|
+
|
|
1397
|
+
|
|
1156
1398
|
@app.command()
|
|
1157
1399
|
def config(
|
|
1400
|
+
path: Annotated[Optional[str], typer.Argument(
|
|
1401
|
+
help="Config path to get (e.g., 'file', 'tool', 'store', 'providers.embedding')"
|
|
1402
|
+
)] = None,
|
|
1158
1403
|
store: StoreOption = None,
|
|
1159
1404
|
):
|
|
1160
1405
|
"""
|
|
1161
|
-
Show
|
|
1406
|
+
Show configuration. Optionally get a specific value by path.
|
|
1407
|
+
|
|
1408
|
+
\b
|
|
1409
|
+
Examples:
|
|
1410
|
+
keep config # Show all config
|
|
1411
|
+
keep config file # Config file location
|
|
1412
|
+
keep config tool # Package directory (SKILL.md location)
|
|
1413
|
+
keep config store # Store path
|
|
1414
|
+
keep config providers # All provider config
|
|
1415
|
+
keep config providers.embedding # Embedding provider name
|
|
1162
1416
|
"""
|
|
1163
1417
|
kp = _get_keeper(store, "default")
|
|
1164
1418
|
|
|
@@ -1166,28 +1420,42 @@ def config(
|
|
|
1166
1420
|
config_path = cfg.config_path if cfg else None
|
|
1167
1421
|
store_path = kp._store_path
|
|
1168
1422
|
|
|
1423
|
+
# If a specific path is requested, return just that value
|
|
1424
|
+
if path:
|
|
1425
|
+
try:
|
|
1426
|
+
value = _get_config_value(kp, path)
|
|
1427
|
+
except typer.BadParameter as e:
|
|
1428
|
+
typer.echo(str(e), err=True)
|
|
1429
|
+
raise typer.Exit(1)
|
|
1430
|
+
|
|
1431
|
+
if _get_json_output():
|
|
1432
|
+
typer.echo(json.dumps({path: value}, indent=2))
|
|
1433
|
+
else:
|
|
1434
|
+
# Raw output for shell scripting
|
|
1435
|
+
if isinstance(value, (list, dict)):
|
|
1436
|
+
typer.echo(json.dumps(value))
|
|
1437
|
+
else:
|
|
1438
|
+
typer.echo(value)
|
|
1439
|
+
return
|
|
1440
|
+
|
|
1441
|
+
# Full config output
|
|
1169
1442
|
if _get_json_output():
|
|
1170
1443
|
result = {
|
|
1444
|
+
"file": str(config_path) if config_path else None,
|
|
1445
|
+
"tool": str(get_tool_directory()),
|
|
1171
1446
|
"store": str(store_path),
|
|
1172
|
-
"config": str(config_path) if config_path else None,
|
|
1173
1447
|
"collections": kp.list_collections(),
|
|
1448
|
+
"providers": {
|
|
1449
|
+
"embedding": cfg.embedding.name if cfg else None,
|
|
1450
|
+
"summarization": cfg.summarization.name if cfg else None,
|
|
1451
|
+
"document": cfg.document.name if cfg else None,
|
|
1452
|
+
},
|
|
1174
1453
|
}
|
|
1175
|
-
if cfg:
|
|
1176
|
-
result["
|
|
1177
|
-
result["summarization"] = cfg.summarization.name
|
|
1454
|
+
if cfg and cfg.default_tags:
|
|
1455
|
+
result["tags"] = cfg.default_tags
|
|
1178
1456
|
typer.echo(json.dumps(result, indent=2))
|
|
1179
1457
|
else:
|
|
1180
|
-
|
|
1181
|
-
typer.echo(f"Config: {config_path}")
|
|
1182
|
-
if cfg and cfg.config_dir and cfg.config_dir.resolve() != store_path.resolve():
|
|
1183
|
-
typer.echo(f"Store: {store_path}")
|
|
1184
|
-
|
|
1185
|
-
typer.echo(f"Collections: {kp.list_collections()}")
|
|
1186
|
-
|
|
1187
|
-
if cfg:
|
|
1188
|
-
typer.echo(f"\nProviders:")
|
|
1189
|
-
typer.echo(f" Embedding: {cfg.embedding.name}")
|
|
1190
|
-
typer.echo(f" Summarization: {cfg.summarization.name}")
|
|
1458
|
+
typer.echo(_format_config_with_defaults(kp))
|
|
1191
1459
|
|
|
1192
1460
|
|
|
1193
1461
|
@app.command("process-pending")
|
keep/config.py
CHANGED
|
@@ -5,6 +5,7 @@ The configuration is stored as a TOML file in the store directory.
|
|
|
5
5
|
It specifies which providers to use and their parameters.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import importlib.resources
|
|
8
9
|
import os
|
|
9
10
|
import platform
|
|
10
11
|
import tomllib
|
|
@@ -19,7 +20,34 @@ import tomli_w
|
|
|
19
20
|
|
|
20
21
|
CONFIG_FILENAME = "keep.toml"
|
|
21
22
|
CONFIG_VERSION = 3 # Bumped for document versioning support
|
|
22
|
-
SYSTEM_DOCS_VERSION =
|
|
23
|
+
SYSTEM_DOCS_VERSION = 3 # Increment when bundled system docs content changes
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_tool_directory() -> Path:
|
|
27
|
+
"""
|
|
28
|
+
Return directory containing SKILL.md (package root).
|
|
29
|
+
|
|
30
|
+
For installed package: SKILL.md is at the same level as the keep/ package.
|
|
31
|
+
For development: it's at the repository root.
|
|
32
|
+
"""
|
|
33
|
+
# Get the keep package location
|
|
34
|
+
keep_pkg = importlib.resources.files("keep")
|
|
35
|
+
pkg_path = Path(str(keep_pkg))
|
|
36
|
+
|
|
37
|
+
# SKILL.md is one level up from the package
|
|
38
|
+
tool_dir = pkg_path.parent
|
|
39
|
+
|
|
40
|
+
# Verify SKILL.md exists there
|
|
41
|
+
if (tool_dir / "SKILL.md").exists():
|
|
42
|
+
return tool_dir
|
|
43
|
+
|
|
44
|
+
# Fallback: check if we're in a development install
|
|
45
|
+
# where SKILL.md might be at repository root
|
|
46
|
+
if pkg_path.name == "keep" and (pkg_path.parent / "SKILL.md").exists():
|
|
47
|
+
return pkg_path.parent
|
|
48
|
+
|
|
49
|
+
# Last resort: return the package parent anyway
|
|
50
|
+
return tool_dir
|
|
23
51
|
|
|
24
52
|
|
|
25
53
|
@dataclass
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
---
|
|
2
|
+
tags:
|
|
3
|
+
category: system
|
|
4
|
+
context: library
|
|
5
|
+
---
|
|
6
|
+
# Library
|
|
7
|
+
|
|
8
|
+
Public domain texts for testing and bootstrapping keep.
|
|
9
|
+
The content, as well as the format, is relevant to the practice of this skill.
|
|
10
|
+
|
|
11
|
+
## Resolving File Paths
|
|
12
|
+
|
|
13
|
+
The library files are located in the `docs/library/` directory of the keep package.
|
|
14
|
+
To construct URIs for these files:
|
|
15
|
+
|
|
16
|
+
1. **From shell:** Use `file://$(keep config tool)/docs/library/{filename}`
|
|
17
|
+
2. **In Python:**
|
|
18
|
+
```python
|
|
19
|
+
from importlib.resources import files
|
|
20
|
+
library_path = files("keep").parent / "docs" / "library"
|
|
21
|
+
uri = f"file://{library_path}/{filename}"
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Files
|
|
27
|
+
|
|
28
|
+
### ancrenewisse.pdf
|
|
29
|
+
- **URI template:** `file://{keep_library}/ancrenewisse.pdf`
|
|
30
|
+
- **Title:** Ancrene Wisse (Ancrene Riwle)
|
|
31
|
+
- **Date:** c. 1200s (13th century)
|
|
32
|
+
- **Language:** Middle English
|
|
33
|
+
- **Translator:** James Morton, The Camden Society, London 1853
|
|
34
|
+
- **Source:** https://www.bsswebsite.me.uk/History/AncreneRiwle/AncreneRiwle2.html
|
|
35
|
+
- **Status:** Public domain
|
|
36
|
+
- **Description:** A monastic guide for Christian anchoresses. Provides guidance on conduct with an "inner" and "outer" rule, and their relationship: "one relates to the right conduct of the heart; the other, to the regulation of the outward life".
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
### impermanence_verse.txt
|
|
41
|
+
- **URI template:** `file://{keep_library}/impermanence_verse.txt`
|
|
42
|
+
- **Title:** 無常偈 (Impermanence Verse / Closing Verse)
|
|
43
|
+
- **Date:** Traditional Zen liturgy (exact origin uncertain)
|
|
44
|
+
- **Language:** Japanese (Kanji/Kana), with romanization and multiple English translations
|
|
45
|
+
- **Source:** Soto Zen liturgy
|
|
46
|
+
- **Status:** Traditional teaching, freely shared
|
|
47
|
+
- **Description:** Four-line verse chanted at the end of Zen practice sessions. "Great is the matter of birth and death / Life slips quickly by / Time waits for no one / Wake up! Wake up!" Includes character-by-character breakdown, cultural context, and linguistic notes.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### mn61.html
|
|
52
|
+
- **URI template:** `file://{keep_library}/mn61.html`
|
|
53
|
+
- **Title:** Ambalaṭṭhikārāhulovāda Sutta (MN 61) - The Exhortation to Rāhula at Mango Stone
|
|
54
|
+
- **Date:** Original: ~5th century BCE; Translation: contemporary
|
|
55
|
+
- **Language:** English translation from Pali
|
|
56
|
+
- **Translator:** Thanissaro Bhikkhu
|
|
57
|
+
- **Source:** https://www.dhammatalks.org/suttas/MN/MN61.html
|
|
58
|
+
- **Format:** Raw HTML (complete with markup, navigation, footnotes)
|
|
59
|
+
- **License:** Freely distributed for educational use
|
|
60
|
+
- **Description:** Buddha's teaching to his son Rāhula on reflection before, during, and after bodily, verbal, and mental actions. The triple-check pattern: reflect before acting/speaking, check while doing, review after. Mirror metaphor for self-reflection.
|
|
61
|
+
**Format note:** Kept as raw HTML to test document processing and summarization on markup-heavy content.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
### an5.57_translation-en-sujato.json
|
|
66
|
+
- **URI template:** `file://{keep_library}/an5.57_translation-en-sujato.json`
|
|
67
|
+
- **Title:** Upajjhāyasutta (AN 5.57) - Subjects for Regular Reviewing
|
|
68
|
+
- **Date:** Original: ~5th century BCE; Translation: modern
|
|
69
|
+
- **Language:** English translation from Pali
|
|
70
|
+
- **Translator:** Bhikkhu Sujato
|
|
71
|
+
- **Source:** SuttaCentral
|
|
72
|
+
- **Source URL:** https://suttacentral.net/an5.57/en/sujato?lang=en
|
|
73
|
+
- **Data:** https://github.com/suttacentral/sc-data/blob/main/sc_bilara_data/translation/en/sujato/sutta/an/an5/an5.57_translation-en-sujato.json
|
|
74
|
+
- **License:** Creative Commons CC0 1.0 Universal (SuttaCentral translations)
|
|
75
|
+
- **Description:** The Five Remembrances - five subjects that all sentient beings should reflect on regularly: aging, sickness, death, separation from loved ones, and being heir to one's own actions. "Reviewing this subject often, they entirely give up bad conduct, or at least reduce it".
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### fortytwo_chapters.txt
|
|
80
|
+
- **URI template:** `file://{keep_library}/fortytwo_chapters.txt`
|
|
81
|
+
- **Title:** 佛說四十二章經 (Sutra of Forty-Two Chapters)
|
|
82
|
+
- **Date:** Eastern Han Dynasty (25-220 CE)
|
|
83
|
+
- **Language:** Classical Chinese
|
|
84
|
+
- **Source:** Project Gutenberg (#23585)
|
|
85
|
+
- **Status:** Public domain
|
|
86
|
+
- **Description:** One of the earliest Buddhist texts to reach China, traditionally attributed to translation by Kāśyapa Mātaṅga and Dharmarakṣa
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
### mumford_sticks_and_stones.txt
|
|
91
|
+
- **URI template:** `file://{keep_library}/mumford_sticks_and_stones.txt`
|
|
92
|
+
- **Title:** Sticks and Stones: A Study of American Architecture and Civilization
|
|
93
|
+
- **Author:** Lewis Mumford (1895-1990)
|
|
94
|
+
- **Date:** 1924
|
|
95
|
+
- **Language:** English
|
|
96
|
+
- **Source:** Internet Archive (sticksstones0000lewi)
|
|
97
|
+
- **Status:** Public domain (published before 1929)
|
|
98
|
+
- **Description:** Mumford's first major work on architecture, examining American building traditions from medieval influences through industrialization. Includes chapters on "The Medieval Tradition," "The Renaissance in New England," "The Age of Rationalism," and more.
|
|
99
|
+
|
|
100
|
+
**Note:** This is OCR text from archive.org. Quality is good but may contain occasional scanning artifacts.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
### true_person_no_rank.md
|
|
105
|
+
- **URI template:** `file://{keep_library}/true_person_no_rank.md`
|
|
106
|
+
- **Title:** 無位真人 (The True Person of No Rank)
|
|
107
|
+
- **Date:** Original: 9th century CE; Commentary layers: 9th-20th centuries
|
|
108
|
+
- **Language:** Chinese (verified original text) with English translation and commentary
|
|
109
|
+
- **Source:** Record of Linji (臨濟錄, Línjì Lù); Book of Serenity (從容錄) Case 38
|
|
110
|
+
- **Primary sources:** DILA Buddhist Dictionary, multiple scholarly translations
|
|
111
|
+
- **Status:** Core teaching in public domain; compiled with verification notes
|
|
112
|
+
- **Description:** Linji Yixuan's famous teaching: "Within this mass of red flesh, there is a true person of no rank, constantly coming and going through the gates of your face." Multi-layered document exploring the original teaching, koan tradition, Dōgen's commentary, modern interpretations, and linguistic analysis. Includes Chinese text (verified), translations, and commentary relationships.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Usage for Testing
|
|
117
|
+
|
|
118
|
+
These texts provide diverse test cases for keep:
|
|
119
|
+
|
|
120
|
+
1. **Different languages:** English, Chinese (Classical and modern romanization), Japanese, Middle English, Pali (via translation)
|
|
121
|
+
2. **Different formats:** PDF, plain text, JSON, Markdown, HTML (with markup)
|
|
122
|
+
3. **Different domains:** Buddhist teachings, Zen liturgy, architectural criticism, medieval instructional prose
|
|
123
|
+
4. **Different writing styles:** Ancient scripture, koan commentary, scholarly analysis, liturgical verse, teaching notes
|
|
124
|
+
5. **Different lengths:** Four-line verses to full books
|
|
125
|
+
6. **Different structures:** Linear narratives, multi-layered commentaries, character-by-character analysis, mirror patterns, web documents with navigation
|
|
126
|
+
7. **Multilingual content:** Japanese-English parallel texts, Chinese with romanization, cross-linguistic terminology
|
|
127
|
+
8. **Processing challenges:** Markdown, UTF-8 plaintext, OCR artifacts (Mumford), HTML markup (MN 61), PDF extraction (Ancrene Wisse), structured JSON data (AN5.57).
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Adding More Test Data
|
|
132
|
+
|
|
133
|
+
When adding public domain texts:
|
|
134
|
+
|
|
135
|
+
1. Verify their relevance to the practice of this skill
|
|
136
|
+
2. Verify compatibility with the MIT license, e.g. public domain status (pre-1929 for US, or explicit license)
|
|
137
|
+
3. Include source URL (Project Gutenberg, archive.org, etc.)
|
|
138
|
+
4. Add metadata to this index
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
Each text retains its original license status (public domain or Creative Commons as noted above). This index and dataset organization is released under CC0 1.0.
|
keep/providers/base.py
CHANGED
|
@@ -148,6 +148,45 @@ class EmbeddingProvider(Protocol):
|
|
|
148
148
|
# Summarization
|
|
149
149
|
# -----------------------------------------------------------------------------
|
|
150
150
|
|
|
151
|
+
# Shared system prompt for all LLM-based summarization providers
|
|
152
|
+
SUMMARIZATION_SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
153
|
+
Create a concise summary of the provided document that captures:
|
|
154
|
+
- The main purpose or topic
|
|
155
|
+
- Key points or functionality
|
|
156
|
+
- Important details that would help someone decide if this document is relevant
|
|
157
|
+
|
|
158
|
+
IMPORTANT: Start the summary directly with the content. Do NOT begin with phrases like:
|
|
159
|
+
- "Here is a concise summary"
|
|
160
|
+
- "This document describes"
|
|
161
|
+
- "The document covers"
|
|
162
|
+
- "Summary:"
|
|
163
|
+
Just state the facts directly. Keep the summary under 200 words."""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def strip_summary_preamble(text: str) -> str:
|
|
167
|
+
"""
|
|
168
|
+
Remove common LLM preambles from summaries.
|
|
169
|
+
|
|
170
|
+
Many models add introductory phrases despite instructions not to.
|
|
171
|
+
This post-processes the output to strip them.
|
|
172
|
+
"""
|
|
173
|
+
import re
|
|
174
|
+
preambles = [
|
|
175
|
+
r"^here is a concise summary[^:]*:\s*",
|
|
176
|
+
r"^here is the summary[^:]*:\s*",
|
|
177
|
+
r"^here's a summary[^:]*:\s*",
|
|
178
|
+
r"^summary:\s*",
|
|
179
|
+
r"^the document describes\s+",
|
|
180
|
+
r"^this document describes\s+",
|
|
181
|
+
r"^the document covers\s+",
|
|
182
|
+
r"^this document covers\s+",
|
|
183
|
+
]
|
|
184
|
+
result = text
|
|
185
|
+
for pattern in preambles:
|
|
186
|
+
result = re.sub(pattern, "", result, flags=re.IGNORECASE)
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
|
|
151
190
|
@runtime_checkable
|
|
152
191
|
class SummarizationProvider(Protocol):
|
|
153
192
|
"""
|
keep/providers/llm.py
CHANGED
|
@@ -6,7 +6,13 @@ import json
|
|
|
6
6
|
import os
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from .base import
|
|
9
|
+
from .base import (
|
|
10
|
+
SummarizationProvider,
|
|
11
|
+
TaggingProvider,
|
|
12
|
+
get_registry,
|
|
13
|
+
SUMMARIZATION_SYSTEM_PROMPT,
|
|
14
|
+
strip_summary_preamble,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
# -----------------------------------------------------------------------------
|
|
@@ -16,19 +22,11 @@ from .base import SummarizationProvider, TaggingProvider, get_registry
|
|
|
16
22
|
class AnthropicSummarization:
|
|
17
23
|
"""
|
|
18
24
|
Summarization provider using Anthropic's Claude API.
|
|
19
|
-
|
|
25
|
+
|
|
20
26
|
Requires: ANTHROPIC_API_KEY environment variable.
|
|
21
27
|
Optionally reads from OpenClaw config via OPENCLAW_CONFIG env var.
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
25
|
-
Create a concise summary of the provided document that captures:
|
|
26
|
-
- The main purpose or topic
|
|
27
|
-
- Key points or functionality
|
|
28
|
-
- Important details that would help someone decide if this document is relevant
|
|
29
|
-
|
|
30
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
|
|
31
|
-
|
|
32
30
|
def __init__(
|
|
33
31
|
self,
|
|
34
32
|
model: str = "claude-3-5-haiku-20241022",
|
|
@@ -56,22 +54,22 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
56
54
|
"""Generate summary using Anthropic Claude."""
|
|
57
55
|
# Truncate very long content
|
|
58
56
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
59
|
-
|
|
57
|
+
|
|
60
58
|
try:
|
|
61
59
|
response = self.client.messages.create(
|
|
62
60
|
model=self.model,
|
|
63
61
|
max_tokens=self.max_tokens,
|
|
64
|
-
system=
|
|
62
|
+
system=SUMMARIZATION_SYSTEM_PROMPT,
|
|
65
63
|
messages=[
|
|
66
64
|
{"role": "user", "content": truncated}
|
|
67
65
|
],
|
|
68
66
|
)
|
|
69
|
-
|
|
67
|
+
|
|
70
68
|
# Extract text from response
|
|
71
69
|
if response.content and len(response.content) > 0:
|
|
72
|
-
return response.content[0].text
|
|
70
|
+
return strip_summary_preamble(response.content[0].text)
|
|
73
71
|
return truncated[:500] # Fallback
|
|
74
|
-
except Exception
|
|
72
|
+
except Exception:
|
|
75
73
|
# Fallback to truncation on error
|
|
76
74
|
return truncated[:500]
|
|
77
75
|
|
|
@@ -79,18 +77,10 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
79
77
|
class OpenAISummarization:
|
|
80
78
|
"""
|
|
81
79
|
Summarization provider using OpenAI's chat API.
|
|
82
|
-
|
|
80
|
+
|
|
83
81
|
Requires: KEEP_OPENAI_API_KEY or OPENAI_API_KEY environment variable.
|
|
84
82
|
"""
|
|
85
|
-
|
|
86
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
87
|
-
Create a concise summary of the provided document that captures:
|
|
88
|
-
- The main purpose or topic
|
|
89
|
-
- Key points or functionality
|
|
90
|
-
- Important details that would help someone decide if this document is relevant
|
|
91
83
|
|
|
92
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly."""
|
|
93
|
-
|
|
94
84
|
def __init__(
|
|
95
85
|
self,
|
|
96
86
|
model: str = "gpt-4o-mini",
|
|
@@ -101,41 +91,39 @@ Be factual and specific. Do not include phrases like "This document" - just stat
|
|
|
101
91
|
from openai import OpenAI
|
|
102
92
|
except ImportError:
|
|
103
93
|
raise RuntimeError("OpenAISummarization requires 'openai' library")
|
|
104
|
-
|
|
94
|
+
|
|
105
95
|
self.model = model
|
|
106
96
|
self.max_tokens = max_tokens
|
|
107
|
-
|
|
97
|
+
|
|
108
98
|
key = api_key or os.environ.get("KEEP_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
109
99
|
if not key:
|
|
110
100
|
raise ValueError("OpenAI API key required")
|
|
111
|
-
|
|
101
|
+
|
|
112
102
|
self._client = OpenAI(api_key=key)
|
|
113
|
-
|
|
103
|
+
|
|
114
104
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
115
105
|
"""Generate a summary using OpenAI."""
|
|
116
106
|
# Truncate very long content to avoid token limits
|
|
117
107
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
118
|
-
|
|
108
|
+
|
|
119
109
|
response = self._client.chat.completions.create(
|
|
120
110
|
model=self.model,
|
|
121
111
|
messages=[
|
|
122
|
-
{"role": "system", "content":
|
|
112
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
123
113
|
{"role": "user", "content": truncated},
|
|
124
114
|
],
|
|
125
115
|
max_tokens=self.max_tokens,
|
|
126
116
|
temperature=0.3,
|
|
127
117
|
)
|
|
128
|
-
|
|
129
|
-
return response.choices[0].message.content.strip()
|
|
118
|
+
|
|
119
|
+
return strip_summary_preamble(response.choices[0].message.content.strip())
|
|
130
120
|
|
|
131
121
|
|
|
132
122
|
class OllamaSummarization:
|
|
133
123
|
"""
|
|
134
124
|
Summarization provider using Ollama's local API.
|
|
135
125
|
"""
|
|
136
|
-
|
|
137
|
-
SYSTEM_PROMPT = OpenAISummarization.SYSTEM_PROMPT
|
|
138
|
-
|
|
126
|
+
|
|
139
127
|
def __init__(
|
|
140
128
|
self,
|
|
141
129
|
model: str = "llama3.2",
|
|
@@ -143,27 +131,27 @@ class OllamaSummarization:
|
|
|
143
131
|
):
|
|
144
132
|
self.model = model
|
|
145
133
|
self.base_url = base_url.rstrip("/")
|
|
146
|
-
|
|
134
|
+
|
|
147
135
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
148
136
|
"""Generate a summary using Ollama."""
|
|
149
137
|
import requests
|
|
150
|
-
|
|
138
|
+
|
|
151
139
|
truncated = content[:50000] if len(content) > 50000 else content
|
|
152
|
-
|
|
140
|
+
|
|
153
141
|
response = requests.post(
|
|
154
142
|
f"{self.base_url}/api/chat",
|
|
155
143
|
json={
|
|
156
144
|
"model": self.model,
|
|
157
145
|
"messages": [
|
|
158
|
-
{"role": "system", "content":
|
|
146
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
159
147
|
{"role": "user", "content": truncated},
|
|
160
148
|
],
|
|
161
149
|
"stream": False,
|
|
162
150
|
},
|
|
163
151
|
)
|
|
164
152
|
response.raise_for_status()
|
|
165
|
-
|
|
166
|
-
return response.json()["message"]["content"].strip()
|
|
153
|
+
|
|
154
|
+
return strip_summary_preamble(response.json()["message"]["content"].strip())
|
|
167
155
|
|
|
168
156
|
|
|
169
157
|
class PassthroughSummarization:
|
keep/providers/mlx.py
CHANGED
|
@@ -10,7 +10,13 @@ Requires: pip install mlx-lm mlx
|
|
|
10
10
|
import os
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
from .base import
|
|
13
|
+
from .base import (
|
|
14
|
+
EmbeddingProvider,
|
|
15
|
+
SummarizationProvider,
|
|
16
|
+
get_registry,
|
|
17
|
+
SUMMARIZATION_SYSTEM_PROMPT,
|
|
18
|
+
strip_summary_preamble,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
|
|
16
22
|
class MLXEmbedding:
|
|
@@ -75,21 +81,12 @@ class MLXEmbedding:
|
|
|
75
81
|
class MLXSummarization:
|
|
76
82
|
"""
|
|
77
83
|
Summarization provider using MLX-LM on Apple Silicon.
|
|
78
|
-
|
|
84
|
+
|
|
79
85
|
Runs local LLMs optimized for Apple Silicon. No API key required.
|
|
80
|
-
|
|
86
|
+
|
|
81
87
|
Requires: pip install mlx-lm
|
|
82
88
|
"""
|
|
83
|
-
|
|
84
|
-
SYSTEM_PROMPT = """You are a precise summarization assistant.
|
|
85
|
-
Create a concise summary of the provided document that captures:
|
|
86
|
-
- The main purpose or topic
|
|
87
|
-
- Key points or functionality
|
|
88
|
-
- Important details that would help someone decide if this document is relevant
|
|
89
89
|
|
|
90
|
-
Be factual and specific. Do not include phrases like "This document" - just state the content directly.
|
|
91
|
-
Keep the summary under 200 words."""
|
|
92
|
-
|
|
93
90
|
def __init__(
|
|
94
91
|
self,
|
|
95
92
|
model: str = "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
|
@@ -122,27 +119,27 @@ Keep the summary under 200 words."""
|
|
|
122
119
|
def summarize(self, content: str, *, max_length: int = 500) -> str:
|
|
123
120
|
"""Generate a summary using MLX-LM."""
|
|
124
121
|
from mlx_lm import generate
|
|
125
|
-
|
|
122
|
+
|
|
126
123
|
# Truncate very long content to fit context window
|
|
127
124
|
# Most models have 4k-8k context, leave room for prompt and response
|
|
128
125
|
max_content_chars = 12000
|
|
129
126
|
truncated = content[:max_content_chars] if len(content) > max_content_chars else content
|
|
130
|
-
|
|
127
|
+
|
|
131
128
|
# Format as chat (works with instruction-tuned models)
|
|
132
129
|
if hasattr(self._tokenizer, "apply_chat_template"):
|
|
133
130
|
messages = [
|
|
134
|
-
{"role": "system", "content":
|
|
131
|
+
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
|
|
135
132
|
{"role": "user", "content": f"Summarize the following:\n\n{truncated}"},
|
|
136
133
|
]
|
|
137
134
|
prompt = self._tokenizer.apply_chat_template(
|
|
138
|
-
messages,
|
|
139
|
-
tokenize=False,
|
|
135
|
+
messages,
|
|
136
|
+
tokenize=False,
|
|
140
137
|
add_generation_prompt=True
|
|
141
138
|
)
|
|
142
139
|
else:
|
|
143
140
|
# Fallback for models without chat template
|
|
144
|
-
prompt = f"{
|
|
145
|
-
|
|
141
|
+
prompt = f"{SUMMARIZATION_SYSTEM_PROMPT}\n\nDocument:\n{truncated}\n\nSummary:"
|
|
142
|
+
|
|
146
143
|
# Generate
|
|
147
144
|
response = generate(
|
|
148
145
|
self._model,
|
|
@@ -151,8 +148,8 @@ Keep the summary under 200 words."""
|
|
|
151
148
|
max_tokens=self.max_tokens,
|
|
152
149
|
verbose=False,
|
|
153
150
|
)
|
|
154
|
-
|
|
155
|
-
return response.strip()
|
|
151
|
+
|
|
152
|
+
return strip_summary_preamble(response.strip())
|
|
156
153
|
|
|
157
154
|
|
|
158
155
|
class MLXTagging:
|
keep/store.py
CHANGED
|
@@ -556,3 +556,34 @@ class ChromaStore:
|
|
|
556
556
|
"""Return the number of items in a collection."""
|
|
557
557
|
coll = self._get_collection(collection)
|
|
558
558
|
return coll.count()
|
|
559
|
+
|
|
560
|
+
# -------------------------------------------------------------------------
|
|
561
|
+
# Resource Management
|
|
562
|
+
# -------------------------------------------------------------------------
|
|
563
|
+
|
|
564
|
+
def close(self) -> None:
|
|
565
|
+
"""
|
|
566
|
+
Close ChromaDB client and release resources.
|
|
567
|
+
|
|
568
|
+
Good practice to call when done, though Python's GC will clean up eventually.
|
|
569
|
+
"""
|
|
570
|
+
self._collections.clear()
|
|
571
|
+
# ChromaDB PersistentClient doesn't have explicit close(),
|
|
572
|
+
# but clearing references allows garbage collection
|
|
573
|
+
self._client = None
|
|
574
|
+
|
|
575
|
+
def __enter__(self):
|
|
576
|
+
"""Context manager entry."""
|
|
577
|
+
return self
|
|
578
|
+
|
|
579
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
580
|
+
"""Context manager exit - close resources."""
|
|
581
|
+
self.close()
|
|
582
|
+
return False
|
|
583
|
+
|
|
584
|
+
def __del__(self):
|
|
585
|
+
"""Cleanup on deletion."""
|
|
586
|
+
try:
|
|
587
|
+
self.close()
|
|
588
|
+
except Exception:
|
|
589
|
+
pass # Suppress errors during garbage collection
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: keep-skill
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: Reflective memory - remember and search documents by meaning
|
|
5
5
|
Project-URL: Homepage, https://github.com/hughpyle/keep
|
|
6
6
|
Project-URL: Repository, https://github.com/hughpyle/keep
|
|
@@ -24,6 +24,7 @@ Requires-Dist: typer>=0.9
|
|
|
24
24
|
Provides-Extra: anthropic
|
|
25
25
|
Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
|
|
26
26
|
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'dev'
|
|
27
28
|
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
28
29
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
30
|
Provides-Extra: documents
|
|
@@ -60,7 +61,7 @@ uv tool install 'keep-skill[local]'
|
|
|
60
61
|
keep init
|
|
61
62
|
|
|
62
63
|
# Index content
|
|
63
|
-
keep update path/to/document.md -t project=myapp
|
|
64
|
+
keep update file:///path/to/document.md -t project=myapp
|
|
64
65
|
keep update "Rate limit is 100 req/min" -t topic=api
|
|
65
66
|
|
|
66
67
|
# Search by meaning
|
|
@@ -115,6 +116,7 @@ keep init # Creates .keep/ at repo root
|
|
|
115
116
|
|
|
116
117
|
# Index files and notes
|
|
117
118
|
keep update file:///path/to/doc.md -t project=myapp
|
|
119
|
+
keep update "Token refresh needs clock sync" -t topic=auth
|
|
118
120
|
keep update "Important insight" -t type=note
|
|
119
121
|
|
|
120
122
|
# Search
|
|
@@ -129,6 +131,7 @@ keep get ID --history # All versions
|
|
|
129
131
|
|
|
130
132
|
# Tags
|
|
131
133
|
keep list --tag project=myapp # Find by tag
|
|
134
|
+
keep find "auth" -t topic=auth # Cross-project topic search
|
|
132
135
|
keep list --tags= # List all tag keys
|
|
133
136
|
|
|
134
137
|
# Current intentions
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
keep/__init__.py,sha256=
|
|
1
|
+
keep/__init__.py,sha256=Z9SmBjE91Ip02X-w1qAYEFwsHlzGkvqGcm5BN7qhw0A,1622
|
|
2
2
|
keep/__main__.py,sha256=3Uu70IhIDIjh8OW6jp9jQQ3dF2lKdJWi_3FtRIQMiMY,104
|
|
3
|
-
keep/api.py,sha256=
|
|
3
|
+
keep/api.py,sha256=GGMY6w2a1b-Yve1aWz35dhZuSA9Va5Whe6M1Ek87yw4,65105
|
|
4
4
|
keep/chunking.py,sha256=neAXOLSvVwbUxapbqq7nZrbSNSzMXuhxj-ODoOSodsU,11830
|
|
5
|
-
keep/cli.py,sha256=
|
|
6
|
-
keep/config.py,sha256=
|
|
5
|
+
keep/cli.py,sha256=82gXDpunX5LpzfKdrRidpPirnurFDZ6pPVBWdXAhwPc,54836
|
|
6
|
+
keep/config.py,sha256=YJ4IN85Y8HqrOxV2qTkGCMOuBcGNLDf0V2FK4KsZD-4,17079
|
|
7
7
|
keep/context.py,sha256=CNpjmrv6eW2kV1E0MO6qAQfhYKRlfzAL--6v4Mj1nFY,71
|
|
8
8
|
keep/document_store.py,sha256=UswqKIGSc5E-r7Tg9k0g5-byYnuar3e9FieQ7WNod9k,29109
|
|
9
9
|
keep/errors.py,sha256=G9e5FbdfeugyfHOuL_SPZlM5jgWWnwsX4hM7IzanBZc,857
|
|
@@ -11,23 +11,24 @@ keep/indexing.py,sha256=dpPYo3WXnIhFDWinz5ZBZVk7_qumeNpP4EpOIY0zMbs,6063
|
|
|
11
11
|
keep/logging_config.py,sha256=IGwkgIyg-TfYaT4MnoCXfmjeHAe_wsB_XQ1QhVT_ro8,3503
|
|
12
12
|
keep/paths.py,sha256=Dv7pM6oo2QgjL6sj5wPjhuMOK2wqUkfd4Kz08TwJ1ps,3331
|
|
13
13
|
keep/pending_summaries.py,sha256=_irGe7P1Lmog2c5cEgx-BElpq4YJW-tEmF5A3IUZQbQ,5727
|
|
14
|
-
keep/store.py,sha256=
|
|
14
|
+
keep/store.py,sha256=JjgqxW6NGpQa_FEOl9KIQ39IkRIVWIHd9gRoRdWvEKk,18867
|
|
15
15
|
keep/types.py,sha256=irvUJYUHQgQdVqC4_lgrG0FbTN1BdZqFxZr0ubVPSG4,2314
|
|
16
16
|
keep/data/__init__.py,sha256=C1YARrudHwK2Bmlxkh7dZlIaNJ5m5WrSTglCdG8e3T0,24
|
|
17
17
|
keep/data/system/__init__.py,sha256=Rp92_sBO3kscuWXJomo0HKeHfU-N4BgBeT3-5El0Mcg,28
|
|
18
18
|
keep/data/system/conversations.md,sha256=jE53wYSUyu5uPFNtO1Tu6w4f5QxqLei7muxLF_kZE2s,9837
|
|
19
19
|
keep/data/system/domains.md,sha256=EHE6zU2-lx7UeLqyOTmoWl1WVlvgRq3_QnFb_EZceEY,5584
|
|
20
|
+
keep/data/system/library.md,sha256=KFDRN7YCPwxttghcb-ts6je9fd2Mlysk_5H6vrwmgX0,7438
|
|
20
21
|
keep/data/system/now.md,sha256=GyQo_LizSIVKbj5q52q4ErV-nxz8rzUOlkILjgNu25s,388
|
|
21
22
|
keep/providers/__init__.py,sha256=6AwJYc6cF1ZT6BcU_6ATyeWk7MHohdVU2-ccqDSvCHU,1094
|
|
22
|
-
keep/providers/base.py,sha256=
|
|
23
|
+
keep/providers/base.py,sha256=qUpVbgLHH4zdvimcM0YMyJnExF7WJ7_U0w_sslpPhSI,15897
|
|
23
24
|
keep/providers/documents.py,sha256=EXeSy5i3RUL0kciIC6w3ldAEfbTIyC5fgfzC_WAI0iY,8211
|
|
24
25
|
keep/providers/embedding_cache.py,sha256=gna6PZEJanbn2GUN0vj1b1MC0xVWePM9cot2KgZUdu8,8856
|
|
25
26
|
keep/providers/embeddings.py,sha256=zi8GyitKexdbCJyU1nLrUhGt_zzPn3udYrrPZ5Ak8Wo,9081
|
|
26
|
-
keep/providers/llm.py,sha256=
|
|
27
|
-
keep/providers/mlx.py,sha256=
|
|
27
|
+
keep/providers/llm.py,sha256=Pcq1fK7NXBzdVrQegjmAFmuHdZXpQraApr8M6O6hJFE,11680
|
|
28
|
+
keep/providers/mlx.py,sha256=xQTXM9kYWUhfqpRVPNCDyF2nkOo50ZYs5DxHELbFB4g,8707
|
|
28
29
|
keep/providers/summarization.py,sha256=MlVTcYipaqp2lT-QYnznp0AMuPVG36QfcTQnvY7Gb-Q,3409
|
|
29
|
-
keep_skill-0.
|
|
30
|
-
keep_skill-0.
|
|
31
|
-
keep_skill-0.
|
|
32
|
-
keep_skill-0.
|
|
33
|
-
keep_skill-0.
|
|
30
|
+
keep_skill-0.11.0.dist-info/METADATA,sha256=AWE9565tLR2SJH-sPkdALnoGZVAj9o5OzZMY18Ux-TA,6238
|
|
31
|
+
keep_skill-0.11.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
32
|
+
keep_skill-0.11.0.dist-info/entry_points.txt,sha256=W8yiI4kNeW0IC8ji4EHRWrvdhFxzaqTIePUhJAJAMOo,39
|
|
33
|
+
keep_skill-0.11.0.dist-info/licenses/LICENSE,sha256=zsm0tpvtyUkevcjn5BIvs9jAho8iwxq3Ax9647AaOSg,1086
|
|
34
|
+
keep_skill-0.11.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|