keep-skill 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keep/config.py CHANGED
@@ -11,7 +11,7 @@ import tomllib
11
11
  from dataclasses import dataclass, field
12
12
  from datetime import datetime, timezone
13
13
  from pathlib import Path
14
- from typing import Any
14
+ from typing import Any, Optional
15
15
 
16
16
  # tomli_w for writing TOML (tomllib is read-only)
17
17
  try:
@@ -21,7 +21,7 @@ except ImportError:
21
21
 
22
22
 
23
23
  CONFIG_FILENAME = "keep.toml"
24
- CONFIG_VERSION = 1
24
+ CONFIG_VERSION = 3 # Bumped for document versioning support
25
25
 
26
26
 
27
27
  @dataclass
@@ -31,23 +31,72 @@ class ProviderConfig:
31
31
  params: dict[str, Any] = field(default_factory=dict)
32
32
 
33
33
 
34
+ @dataclass
35
+ class EmbeddingIdentity:
36
+ """
37
+ Identity of an embedding model for compatibility checking.
38
+
39
+ Two embeddings are compatible only if they have the same identity.
40
+ Different models, even with the same dimension, produce incompatible vectors.
41
+ """
42
+ provider: str # e.g., "sentence-transformers", "openai"
43
+ model: str # e.g., "all-MiniLM-L6-v2", "text-embedding-3-small"
44
+ dimension: int # e.g., 384, 1536
45
+
46
+ @property
47
+ def key(self) -> str:
48
+ """
49
+ Short key for collection naming.
50
+
51
+ Format: {provider}_{model_slug}
52
+ e.g., "st_MiniLM_L6_v2", "openai_3_small"
53
+ """
54
+ # Simplify model name for use in collection names
55
+ model_slug = self.model.replace("-", "_").replace(".", "_")
56
+ # Remove common prefixes
57
+ for prefix in ["all_", "text_embedding_"]:
58
+ if model_slug.lower().startswith(prefix):
59
+ model_slug = model_slug[len(prefix):]
60
+ # Shorten provider names
61
+ provider_short = {
62
+ "sentence-transformers": "st",
63
+ "openai": "openai",
64
+ "gemini": "gemini",
65
+ "ollama": "ollama",
66
+ }.get(self.provider, self.provider[:6])
67
+
68
+ return f"{provider_short}_{model_slug}"
69
+
70
+
34
71
  @dataclass
35
72
  class StoreConfig:
36
73
  """Complete store configuration."""
37
- path: Path
74
+ path: Path # Store path (where data lives)
75
+ config_dir: Optional[Path] = None # Where config was loaded from (may differ from path)
76
+ store_path: Optional[str] = None # Explicit store.path from config file (raw string)
38
77
  version: int = CONFIG_VERSION
39
78
  created: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
40
-
79
+
41
80
  # Provider configurations
42
81
  embedding: ProviderConfig = field(default_factory=lambda: ProviderConfig("sentence-transformers"))
43
82
  summarization: ProviderConfig = field(default_factory=lambda: ProviderConfig("truncate"))
44
83
  document: ProviderConfig = field(default_factory=lambda: ProviderConfig("composite"))
45
-
84
+
85
+ # Embedding identity (set after first use, used for validation)
86
+ embedding_identity: Optional[EmbeddingIdentity] = None
87
+
88
+ # Default tags applied to all update/remember operations
89
+ default_tags: dict[str, str] = field(default_factory=dict)
90
+
91
+ # Maximum length for summaries (used for smart remember and validation)
92
+ max_summary_length: int = 500
93
+
46
94
  @property
47
95
  def config_path(self) -> Path:
48
96
  """Path to the TOML config file."""
49
- return self.path / CONFIG_FILENAME
50
-
97
+ config_location = self.config_dir if self.config_dir else self.path
98
+ return config_location / CONFIG_FILENAME
99
+
51
100
  def exists(self) -> bool:
52
101
  """Check if config file exists."""
53
102
  return self.config_path.exists()
@@ -182,9 +231,13 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
182
231
  params["model"] = ms_model
183
232
  embedding_provider = ProviderConfig("gemini", params)
184
233
 
185
- # Fall back to sentence-transformers (local, always works)
234
+ # Fall back to local embedding (prefer MPS-accelerated on Apple Silicon)
186
235
  if embedding_provider is None:
187
- embedding_provider = ProviderConfig("sentence-transformers")
236
+ if is_apple_silicon:
237
+ # Use sentence-transformers with MPS acceleration (no auth required)
238
+ embedding_provider = ProviderConfig("mlx", {"model": "all-MiniLM-L6-v2"})
239
+ else:
240
+ embedding_provider = ProviderConfig("sentence-transformers")
188
241
 
189
242
  providers["embedding"] = embedding_provider
190
243
 
@@ -225,99 +278,177 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
225
278
  return providers
226
279
 
227
280
 
228
- def create_default_config(store_path: Path) -> StoreConfig:
229
- """Create a new config with auto-detected defaults."""
281
+ def create_default_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
282
+ """
283
+ Create a new config with auto-detected defaults.
284
+
285
+ Args:
286
+ config_dir: Directory where keep.toml will be saved
287
+ store_path: Optional explicit store location (if different from config_dir)
288
+ """
230
289
  providers = detect_default_providers()
231
-
290
+
291
+ # If store_path is provided and different from config_dir, record it
292
+ store_path_str = None
293
+ actual_store = config_dir
294
+ if store_path and store_path.resolve() != config_dir.resolve():
295
+ store_path_str = str(store_path)
296
+ actual_store = store_path
297
+
232
298
  return StoreConfig(
233
- path=store_path,
299
+ path=actual_store,
300
+ config_dir=config_dir,
301
+ store_path=store_path_str,
234
302
  embedding=providers["embedding"],
235
303
  summarization=providers["summarization"],
236
304
  document=providers["document"],
237
305
  )
238
306
 
239
307
 
240
- def load_config(store_path: Path) -> StoreConfig:
308
+ def load_config(config_dir: Path) -> StoreConfig:
241
309
  """
242
- Load configuration from a store directory.
243
-
310
+ Load configuration from a config directory.
311
+
312
+ The config_dir is where keep.toml lives. The actual store location
313
+ may be different if store.path is set in the config.
314
+
315
+ Args:
316
+ config_dir: Directory containing keep.toml
317
+
244
318
  Raises:
245
319
  FileNotFoundError: If config doesn't exist
246
320
  ValueError: If config is invalid
247
321
  """
248
- config_path = store_path / CONFIG_FILENAME
249
-
322
+ config_path = config_dir / CONFIG_FILENAME
323
+
250
324
  if not config_path.exists():
251
325
  raise FileNotFoundError(f"Config not found: {config_path}")
252
-
326
+
253
327
  with open(config_path, "rb") as f:
254
328
  data = tomllib.load(f)
255
-
329
+
256
330
  # Validate version
257
331
  version = data.get("store", {}).get("version", 1)
258
332
  if version > CONFIG_VERSION:
259
333
  raise ValueError(f"Config version {version} is newer than supported ({CONFIG_VERSION})")
260
-
334
+
335
+ # Parse store.path - explicit store location
336
+ store_path_str = data.get("store", {}).get("path")
337
+ if store_path_str:
338
+ actual_store = Path(store_path_str).expanduser().resolve()
339
+ else:
340
+ actual_store = config_dir # Backwards compat: store is at config location
341
+
261
342
  # Parse provider configs
262
343
  def parse_provider(section: dict) -> ProviderConfig:
263
344
  return ProviderConfig(
264
345
  name=section.get("name", ""),
265
346
  params={k: v for k, v in section.items() if k != "name"},
266
347
  )
267
-
348
+
349
+ # Parse default tags (filter out system tags)
350
+ raw_tags = data.get("tags", {})
351
+ default_tags = {k: str(v) for k, v in raw_tags.items()
352
+ if not k.startswith("_")}
353
+
354
+ # Parse max_summary_length (default 500)
355
+ max_summary_length = data.get("store", {}).get("max_summary_length", 500)
356
+
268
357
  return StoreConfig(
269
- path=store_path,
358
+ path=actual_store,
359
+ config_dir=config_dir,
360
+ store_path=store_path_str,
270
361
  version=version,
271
362
  created=data.get("store", {}).get("created", ""),
272
363
  embedding=parse_provider(data.get("embedding", {"name": "sentence-transformers"})),
273
364
  summarization=parse_provider(data.get("summarization", {"name": "truncate"})),
274
365
  document=parse_provider(data.get("document", {"name": "composite"})),
366
+ embedding_identity=parse_embedding_identity(data.get("embedding_identity")),
367
+ default_tags=default_tags,
368
+ max_summary_length=max_summary_length,
275
369
  )
276
370
 
277
371
 
372
+ def parse_embedding_identity(data: dict | None) -> EmbeddingIdentity | None:
373
+ """Parse embedding identity from config data."""
374
+ if data is None:
375
+ return None
376
+ provider = data.get("provider")
377
+ model = data.get("model")
378
+ dimension = data.get("dimension")
379
+ if provider and model and dimension:
380
+ return EmbeddingIdentity(provider=provider, model=model, dimension=dimension)
381
+ return None
382
+
383
+
278
384
  def save_config(config: StoreConfig) -> None:
279
385
  """
280
- Save configuration to the store directory.
281
-
386
+ Save configuration to the config directory.
387
+
282
388
  Creates the directory if it doesn't exist.
283
389
  """
284
390
  if tomli_w is None:
285
391
  raise RuntimeError("tomli_w is required to save config. Install with: pip install tomli-w")
286
-
287
- # Ensure directory exists
288
- config.path.mkdir(parents=True, exist_ok=True)
289
-
392
+
393
+ # Ensure config directory exists
394
+ config_location = config.config_dir if config.config_dir else config.path
395
+ config_location.mkdir(parents=True, exist_ok=True)
396
+
290
397
  # Build TOML structure
291
398
  def provider_to_dict(p: ProviderConfig) -> dict:
292
399
  d = {"name": p.name}
293
400
  d.update(p.params)
294
401
  return d
295
-
402
+
403
+ store_section: dict[str, Any] = {
404
+ "version": config.version,
405
+ "created": config.created,
406
+ }
407
+ # Only write store.path if explicitly set (not default)
408
+ if config.store_path:
409
+ store_section["path"] = config.store_path
410
+ # Only write max_summary_length if not default
411
+ if config.max_summary_length != 500:
412
+ store_section["max_summary_length"] = config.max_summary_length
413
+
296
414
  data = {
297
- "store": {
298
- "version": config.version,
299
- "created": config.created,
300
- },
415
+ "store": store_section,
301
416
  "embedding": provider_to_dict(config.embedding),
302
417
  "summarization": provider_to_dict(config.summarization),
303
418
  "document": provider_to_dict(config.document),
304
419
  }
305
-
420
+
421
+ # Add embedding identity if set
422
+ if config.embedding_identity:
423
+ data["embedding_identity"] = {
424
+ "provider": config.embedding_identity.provider,
425
+ "model": config.embedding_identity.model,
426
+ "dimension": config.embedding_identity.dimension,
427
+ }
428
+
429
+ # Add default tags if set
430
+ if config.default_tags:
431
+ data["tags"] = config.default_tags
432
+
306
433
  with open(config.config_path, "wb") as f:
307
434
  tomli_w.dump(data, f)
308
435
 
309
436
 
310
- def load_or_create_config(store_path: Path) -> StoreConfig:
437
+ def load_or_create_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
311
438
  """
312
439
  Load existing config or create a new one with defaults.
313
-
440
+
314
441
  This is the main entry point for config management.
442
+
443
+ Args:
444
+ config_dir: Directory containing (or to contain) keep.toml
445
+ store_path: Optional explicit store location (for new configs only)
315
446
  """
316
- config_path = store_path / CONFIG_FILENAME
317
-
447
+ config_path = config_dir / CONFIG_FILENAME
448
+
318
449
  if config_path.exists():
319
- return load_config(store_path)
450
+ return load_config(config_dir)
320
451
  else:
321
- config = create_default_config(store_path)
452
+ config = create_default_config(config_dir, store_path)
322
453
  save_config(config)
323
454
  return config
keep/context.py CHANGED
@@ -1,127 +1,3 @@
1
1
  """
2
- Working context and top-of-mind retrieval.
3
-
4
- This module provides hierarchical context management for efficient
5
- "what are we working on?" queries with O(log(log(N))) retrieval.
2
+ Context module - placeholder for future routing functionality.
6
3
  """
7
-
8
- from dataclasses import dataclass, field
9
- from datetime import datetime, timezone
10
- from typing import Any, Optional
11
-
12
-
13
- @dataclass
14
- class WorkingContext:
15
- """
16
- The current working context — a high-level summary of active work.
17
-
18
- This is the "Level 3" summary that any agent can read to instantly
19
- understand what's being worked on.
20
-
21
- Attributes:
22
- summary: Natural language description of current focus
23
- active_items: IDs of items currently being worked with
24
- topics: Active topic/domain tags
25
- updated: When context was last updated
26
- session_id: Current session identifier
27
- metadata: Additional context-specific data (arbitrary structure)
28
- """
29
- summary: str
30
- active_items: list[str] = field(default_factory=list)
31
- topics: list[str] = field(default_factory=list)
32
- updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
33
- session_id: Optional[str] = None
34
- metadata: dict[str, Any] = field(default_factory=dict)
35
-
36
-
37
- @dataclass
38
- class TopicSummary:
39
- """
40
- A summary of items within a topic cluster (Level 2).
41
-
42
- Topics aggregate related items and provide a mid-level
43
- overview without retrieving all underlying items.
44
-
45
- Attributes:
46
- topic: Topic identifier (tag value)
47
- summary: Generated summary of topic contents
48
- item_count: Number of items in this topic
49
- key_items: IDs of the most important items in the topic
50
- subtopics: Child topics if hierarchical
51
- updated: When topic summary was last regenerated
52
- """
53
- topic: str
54
- summary: str
55
- item_count: int
56
- key_items: list[str] = field(default_factory=list)
57
- subtopics: list[str] = field(default_factory=list)
58
- updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
59
-
60
-
61
- @dataclass
62
- class RoutingContext:
63
- """
64
- Describes how items are routed between private and shared stores.
65
-
66
- This document lives at a well-known location in the shared store.
67
- The facade reads it to make routing decisions. The private store
68
- is physically separate and invisible from the shared store.
69
-
70
- Attributes:
71
- summary: Natural language description of the privacy model
72
- private_patterns: Tag patterns that route to private store (each pattern is dict[str, str])
73
- private_store_path: Location of the private store (if local)
74
- updated: When routing was last modified
75
- metadata: Additional routing configuration
76
- """
77
- summary: str = "Items tagged for private/draft visibility route to a separate store."
78
- private_patterns: list[dict[str, str]] = field(default_factory=lambda: [
79
- {"_visibility": "draft"},
80
- {"_visibility": "private"},
81
- {"_for": "self"},
82
- ])
83
- private_store_path: Optional[str] = None # Resolved at init; None = default location
84
- updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
85
- metadata: dict[str, Any] = field(default_factory=dict)
86
-
87
-
88
- # Well-known item ID for the routing context document
89
- ROUTING_CONTEXT_ID = "_system:routing"
90
-
91
-
92
- # Reserved system tags for context management (stored with items)
93
- CONTEXT_TAGS = {
94
- "_session": "Session that last touched this item",
95
- "_topic": "Primary topic classification",
96
- "_level": "Hierarchy level (0=source, 1=cluster, 2=topic, 3=context)",
97
- "_summarizes": "IDs of items this item summarizes (for hierarchy)",
98
- }
99
-
100
- # Relevance scoring is computed at query time, NOT stored.
101
- # This preserves agility between broad exploration and focused work.
102
- # Score factors:
103
- # - semantic similarity to query/hint
104
- # - recency (time decay)
105
- # - topic overlap with current WorkingContext.topics
106
- # - session affinity (same session = boost)
107
- # The weighting of these factors can vary by retrieval mode.
108
-
109
-
110
- def generate_session_id() -> str:
111
- """Generate a unique session identifier."""
112
- import uuid
113
- date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
114
- short_uuid = uuid.uuid4().hex[:8]
115
- return f"{date}:{short_uuid}"
116
-
117
-
118
- def matches_private_pattern(tags: dict[str, str], patterns: list[dict[str, str]]) -> bool:
119
- """
120
- Check if an item's tags match any private routing pattern.
121
-
122
- A pattern matches if ALL its key-value pairs are present in tags.
123
- """
124
- for pattern in patterns:
125
- if all(tags.get(k) == v for k, v in pattern.items()):
126
- return True
127
- return False