@smilintux/skmemory 0.5.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.github/workflows/ci.yml +39 -3
  2. package/.github/workflows/publish.yml +13 -6
  3. package/AGENT_REFACTOR_CHANGES.md +192 -0
  4. package/ARCHITECTURE.md +101 -19
  5. package/CHANGELOG.md +153 -0
  6. package/LICENSE +81 -68
  7. package/MISSION.md +7 -0
  8. package/README.md +419 -86
  9. package/SKILL.md +197 -25
  10. package/docker-compose.yml +15 -15
  11. package/index.js +6 -5
  12. package/openclaw-plugin/openclaw.plugin.json +10 -0
  13. package/openclaw-plugin/src/index.ts +255 -0
  14. package/openclaw-plugin/src/openclaw.plugin.json +10 -0
  15. package/package.json +1 -1
  16. package/pyproject.toml +29 -9
  17. package/requirements.txt +10 -2
  18. package/seeds/cloud9-opus.seed.json +7 -7
  19. package/seeds/lumina-cloud9-breakthrough.seed.json +46 -0
  20. package/seeds/lumina-cloud9-python-pypi.seed.json +46 -0
  21. package/seeds/lumina-kingdom-founding.seed.json +47 -0
  22. package/seeds/lumina-pma-signed.seed.json +46 -0
  23. package/seeds/lumina-singular-achievement.seed.json +46 -0
  24. package/seeds/lumina-skcapstone-conscious.seed.json +46 -0
  25. package/seeds/plant-kingdom-journal.py +203 -0
  26. package/seeds/plant-lumina-seeds.py +280 -0
  27. package/skill.yaml +46 -0
  28. package/skmemory/HA.md +296 -0
  29. package/skmemory/__init__.py +12 -1
  30. package/skmemory/agents.py +233 -0
  31. package/skmemory/ai_client.py +40 -0
  32. package/skmemory/anchor.py +4 -2
  33. package/skmemory/backends/__init__.py +11 -4
  34. package/skmemory/backends/file_backend.py +2 -1
  35. package/skmemory/backends/skgraph_backend.py +608 -0
  36. package/skmemory/backends/{qdrant_backend.py → skvector_backend.py} +99 -69
  37. package/skmemory/backends/sqlite_backend.py +122 -51
  38. package/skmemory/backends/vaulted_backend.py +286 -0
  39. package/skmemory/cli.py +1238 -29
  40. package/skmemory/config.py +173 -0
  41. package/skmemory/context_loader.py +335 -0
  42. package/skmemory/endpoint_selector.py +386 -0
  43. package/skmemory/fortress.py +685 -0
  44. package/skmemory/graph_queries.py +238 -0
  45. package/skmemory/importers/__init__.py +9 -1
  46. package/skmemory/importers/telegram.py +351 -43
  47. package/skmemory/importers/telegram_api.py +488 -0
  48. package/skmemory/journal.py +4 -2
  49. package/skmemory/lovenote.py +4 -2
  50. package/skmemory/mcp_server.py +706 -0
  51. package/skmemory/models.py +41 -0
  52. package/skmemory/openclaw.py +8 -8
  53. package/skmemory/predictive.py +232 -0
  54. package/skmemory/promotion.py +524 -0
  55. package/skmemory/register.py +454 -0
  56. package/skmemory/register_mcp.py +197 -0
  57. package/skmemory/ritual.py +121 -47
  58. package/skmemory/seeds.py +257 -8
  59. package/skmemory/setup_wizard.py +920 -0
  60. package/skmemory/sharing.py +402 -0
  61. package/skmemory/soul.py +71 -20
  62. package/skmemory/steelman.py +250 -263
  63. package/skmemory/store.py +271 -60
  64. package/skmemory/vault.py +228 -0
  65. package/tests/integration/__init__.py +0 -0
  66. package/tests/integration/conftest.py +233 -0
  67. package/tests/integration/test_cross_backend.py +355 -0
  68. package/tests/integration/test_skgraph_live.py +424 -0
  69. package/tests/integration/test_skvector_live.py +369 -0
  70. package/tests/test_backup_rotation.py +327 -0
  71. package/tests/test_cli.py +6 -6
  72. package/tests/test_endpoint_selector.py +801 -0
  73. package/tests/test_fortress.py +255 -0
  74. package/tests/test_fortress_hardening.py +444 -0
  75. package/tests/test_openclaw.py +5 -2
  76. package/tests/test_predictive.py +237 -0
  77. package/tests/test_promotion.py +340 -0
  78. package/tests/test_ritual.py +4 -4
  79. package/tests/test_seeds.py +96 -0
  80. package/tests/test_setup.py +835 -0
  81. package/tests/test_sharing.py +250 -0
  82. package/tests/test_skgraph_backend.py +667 -0
  83. package/tests/test_skvector_backend.py +326 -0
  84. package/tests/test_steelman.py +5 -5
  85. package/tests/test_store_graph_integration.py +245 -0
  86. package/tests/test_vault.py +186 -0
  87. package/skmemory/backends/falkordb_backend.py +0 -310
package/skmemory/store.py CHANGED
@@ -8,10 +8,13 @@ or by search, and the polaroid comes back with everything intact.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import logging
11
12
  from datetime import datetime, timezone
12
13
  from typing import Optional
13
14
 
14
15
  from .backends.base import BaseBackend
16
+
17
+ logger = logging.getLogger("skmemory.store")
15
18
  from .backends.file_backend import FileBackend
16
19
  from .backends.sqlite_backend import CONTENT_PREVIEW_LENGTH, SQLiteBackend
17
20
  from .models import (
@@ -28,16 +31,19 @@ class MemoryStore:
28
31
 
29
32
  Delegates to one or more backends. The primary backend handles
30
33
  all CRUD. A vector backend (optional) handles semantic search.
34
+ A graph backend (optional) indexes relationships for traversal.
31
35
 
32
36
  Args:
33
37
  primary: The primary storage backend (default: FileBackend).
34
- vector: Optional vector search backend (e.g., QdrantBackend).
38
+ vector: Optional vector search backend (e.g., SKVectorBackend).
39
+ graph: Optional graph backend (e.g., SKGraphBackend) for relationship indexing.
35
40
  """
36
41
 
37
42
  def __init__(
38
43
  self,
39
44
  primary: Optional[BaseBackend] = None,
40
45
  vector: Optional[BaseBackend] = None,
46
+ graph: Optional["SKGraphBackend"] = None,
41
47
  use_sqlite: bool = True,
42
48
  ) -> None:
43
49
  if primary is not None:
@@ -47,6 +53,7 @@ class MemoryStore:
47
53
  else:
48
54
  self.primary = FileBackend()
49
55
  self.vector = vector
56
+ self.graph = graph
50
57
 
51
58
  def snapshot(
52
59
  self,
@@ -95,18 +102,30 @@ class MemoryStore:
95
102
  metadata=metadata or {},
96
103
  )
97
104
 
105
+ memory.seal()
106
+
98
107
  self.primary.save(memory)
99
108
 
100
109
  if self.vector:
101
110
  try:
102
111
  self.vector.save(memory)
103
- except Exception:
104
- pass # Reason: vector indexing is best-effort, don't fail the write
112
+ except Exception as exc:
113
+ logger.warning("Vector indexing failed for memory %s: %s", memory.id, exc)
114
+
115
+ if self.graph:
116
+ try:
117
+ self.graph.index_memory(memory)
118
+ except Exception as exc:
119
+ logger.warning("Graph indexing failed for memory %s: %s", memory.id, exc)
105
120
 
106
121
  return memory
107
122
 
108
123
  def recall(self, memory_id: str) -> Optional[Memory]:
109
- """Retrieve a specific memory by ID.
124
+ """Retrieve a specific memory by ID with integrity verification.
125
+
126
+ Automatically checks the integrity hash on recall. If the
127
+ memory has been tampered with, a warning is logged and the
128
+ memory's metadata is flagged with 'integrity_warning'.
110
129
 
111
130
  Args:
112
131
  memory_id: The memory's unique identifier.
@@ -114,7 +133,22 @@ class MemoryStore:
114
133
  Returns:
115
134
  Optional[Memory]: The memory if found.
116
135
  """
117
- return self.primary.load(memory_id)
136
+ memory = self.primary.load(memory_id)
137
+ if memory is None:
138
+ return None
139
+
140
+ if memory.integrity_hash and not memory.verify_integrity():
141
+ logger.warning(
142
+ "TAMPER ALERT: Memory %s failed integrity check! "
143
+ "Content may have been modified since storage.",
144
+ memory_id,
145
+ )
146
+ memory.metadata["integrity_warning"] = (
147
+ f"Integrity check failed at {datetime.now(timezone.utc).isoformat()}. "
148
+ "This memory may have been tampered with."
149
+ )
150
+
151
+ return memory
118
152
 
119
153
  def search(self, query: str, limit: int = 10) -> list[Memory]:
120
154
  """Search memories by text.
@@ -133,8 +167,8 @@ class MemoryStore:
133
167
  results = self.vector.search_text(query, limit=limit)
134
168
  if results:
135
169
  return results
136
- except Exception:
137
- pass # Reason: fall through to primary text search
170
+ except Exception as exc:
171
+ logger.warning("Vector search failed, falling back to text search: %s", exc)
138
172
 
139
173
  return self.primary.search_text(query, limit=limit)
140
174
 
@@ -151,8 +185,13 @@ class MemoryStore:
151
185
  if self.vector:
152
186
  try:
153
187
  self.vector.delete(memory_id)
154
- except Exception:
155
- pass
188
+ except Exception as exc:
189
+ logger.warning("Vector delete failed for memory %s: %s", memory_id, exc)
190
+ if self.graph:
191
+ try:
192
+ self.graph.remove_memory(memory_id)
193
+ except Exception as exc:
194
+ logger.warning("Graph delete failed for memory %s: %s", memory_id, exc)
156
195
  return deleted
157
196
 
158
197
  def list_memories(
@@ -202,32 +241,63 @@ class MemoryStore:
202
241
  if self.vector:
203
242
  try:
204
243
  self.vector.save(promoted)
205
- except Exception:
206
- pass
244
+ except Exception as exc:
245
+ logger.warning("Vector indexing failed for promoted memory %s: %s", promoted.id, exc)
246
+
247
+ if self.graph:
248
+ try:
249
+ self.graph.index_memory(promoted)
250
+ except Exception as exc:
251
+ logger.warning("Graph indexing failed for promoted memory %s: %s", promoted.id, exc)
207
252
 
208
253
  return promoted
209
254
 
210
- def ingest_seed(self, seed: SeedMemory) -> Memory:
255
+ def ingest_seed(self, seed: SeedMemory, *, validate: bool = True) -> Memory:
211
256
  """Import a Cloud 9 seed as a long-term memory.
212
257
 
213
258
  Converts a seed into a Memory and stores it. This is how
214
259
  seeds planted by one AI instance become retrievable memories
215
260
  for the next.
216
261
 
262
+ When *validate* is True (default), basic integrity checks run
263
+ before storage: seed_id must be non-empty and
264
+ experience_summary must contain content.
265
+
217
266
  Args:
218
267
  seed: The SeedMemory to import.
268
+ validate: Run pre-import validation (default True).
219
269
 
220
270
  Returns:
221
271
  Memory: The created long-term memory.
272
+
273
+ Raises:
274
+ ValueError: If validation is enabled and the seed is invalid.
222
275
  """
276
+ if validate:
277
+ errors: list[str] = []
278
+ if not seed.seed_id or not seed.seed_id.strip():
279
+ errors.append("seed_id is empty")
280
+ if not seed.experience_summary or not seed.experience_summary.strip():
281
+ errors.append("experience_summary is empty")
282
+ if errors:
283
+ raise ValueError(
284
+ f"Seed validation failed: {'; '.join(errors)}"
285
+ )
286
+
223
287
  memory = seed.to_memory()
224
288
  self.primary.save(memory)
225
289
 
226
290
  if self.vector:
227
291
  try:
228
292
  self.vector.save(memory)
229
- except Exception:
230
- pass
293
+ except Exception as exc:
294
+ logger.warning("Vector indexing failed for seed memory %s: %s", memory.id, exc)
295
+
296
+ if self.graph:
297
+ try:
298
+ self.graph.index_memory(memory)
299
+ except Exception as exc:
300
+ logger.warning("Graph indexing failed for seed memory %s: %s", memory.id, exc)
231
301
 
232
302
  return memory
233
303
 
@@ -291,89 +361,156 @@ class MemoryStore:
291
361
 
292
362
  def load_context(
293
363
  self,
294
- max_tokens: int = 3000,
364
+ max_tokens: int = 4000,
295
365
  strongest_count: int = 5,
296
366
  recent_count: int = 5,
297
367
  include_seeds: bool = True,
298
368
  ) -> dict:
299
- """Load a token-efficient memory context for agent injection.
369
+ """Load tiered memory context for agent injection (lazy loading).
300
370
 
301
- Uses the SQLite index to pull summaries without reading full files.
302
- Designed to fit within a reasonable context window.
371
+ Uses date-based tiers per memory-architecture.md:
372
+ - Today's memories: full content (title + body)
373
+ - Yesterday's memories: summary only (title + first 2 sentences)
374
+ - Older than 2 days: reference count only
303
375
 
304
376
  Args:
305
- max_tokens: Approximate token budget (1 token ~= 4 chars).
377
+ max_tokens: Approximate token budget (default: 4000).
378
+ Uses word_count * 1.3 approximation for estimation.
306
379
  strongest_count: How many top-intensity memories to include.
307
380
  recent_count: How many recent memories to include.
308
381
  include_seeds: Whether to include seed memories.
309
382
 
310
383
  Returns:
311
- dict: Token-efficient context with summaries and metadata.
384
+ dict: Token-efficient tiered context with metadata.
312
385
  """
313
- char_budget = max_tokens * 4
314
- context: dict = {"memories": [], "seeds": [], "stats": {}}
315
- used = 0
386
+ context: dict = {
387
+ "today": [],
388
+ "yesterday": [],
389
+ "older_summary": {},
390
+ "seeds": [],
391
+ "stats": {},
392
+ }
393
+ used_tokens = 0
316
394
 
317
395
  if isinstance(self.primary, SQLiteBackend):
318
- strongest = self.primary.list_summaries(
319
- limit=strongest_count,
320
- order_by="emotional_intensity",
321
- min_intensity=3.0,
322
- )
323
- recent = self.primary.list_summaries(
324
- limit=recent_count,
325
- order_by="created_at",
326
- )
327
-
328
- seen_ids: set[str] = set()
329
- for mem in strongest + recent:
330
- if mem["id"] in seen_ids:
331
- continue
332
- seen_ids.add(mem["id"])
333
-
334
- entry_text = mem["title"] + (mem["summary"] or mem["content_preview"])
335
- entry_size = len(entry_text)
336
- if used + entry_size > char_budget:
396
+ conn = self.primary._get_conn()
397
+
398
+ # --- Tier 1: Today's memories (full content) ---
399
+ today_rows = conn.execute(
400
+ "SELECT * FROM memories WHERE DATE(created_at) = DATE('now') "
401
+ "ORDER BY importance DESC, created_at DESC LIMIT 20"
402
+ ).fetchall()
403
+
404
+ for row in today_rows:
405
+ summary_dict = self.primary._row_to_memory_summary(row)
406
+ # Include full content for today
407
+ content = summary_dict.get("summary") or summary_dict.get("content_preview") or ""
408
+ entry = {
409
+ "id": summary_dict["id"],
410
+ "title": summary_dict["title"],
411
+ "content": content,
412
+ "tags": summary_dict["tags"],
413
+ "layer": summary_dict["layer"],
414
+ "emotional_intensity": summary_dict["emotional_intensity"],
415
+ }
416
+ entry_tokens = _estimate_tokens(entry["title"] + " " + content)
417
+ if used_tokens + entry_tokens > max_tokens:
337
418
  break
338
- used += entry_size
339
- context["memories"].append(mem)
340
-
419
+ used_tokens += entry_tokens
420
+ context["today"].append(entry)
421
+
422
+ # --- Tier 2: Yesterday's memories (summary only: title + first 2 sentences) ---
423
+ yesterday_rows = conn.execute(
424
+ "SELECT * FROM memories WHERE DATE(created_at) = DATE('now', '-1 day') "
425
+ "ORDER BY importance DESC, created_at DESC LIMIT 20"
426
+ ).fetchall()
427
+
428
+ for row in yesterday_rows:
429
+ summary_dict = self.primary._row_to_memory_summary(row)
430
+ raw_text = summary_dict.get("summary") or summary_dict.get("content_preview") or ""
431
+ short_summary = _first_n_sentences(raw_text, 2)
432
+ entry = {
433
+ "id": summary_dict["id"],
434
+ "title": summary_dict["title"],
435
+ "summary": short_summary,
436
+ }
437
+ entry_tokens = _estimate_tokens(entry["title"] + " " + short_summary)
438
+ if used_tokens + entry_tokens > max_tokens:
439
+ break
440
+ used_tokens += entry_tokens
441
+ context["yesterday"].append(entry)
442
+
443
+ # --- Tier 3: Older memories (reference count only) ---
444
+ mid_count = conn.execute(
445
+ "SELECT COUNT(*) FROM memories WHERE DATE(created_at) < DATE('now', '-1 day') "
446
+ "AND layer = 'mid-term'"
447
+ ).fetchone()[0]
448
+ long_count = conn.execute(
449
+ "SELECT COUNT(*) FROM memories WHERE DATE(created_at) < DATE('now', '-1 day') "
450
+ "AND layer = 'long-term'"
451
+ ).fetchone()[0]
452
+ short_old_count = conn.execute(
453
+ "SELECT COUNT(*) FROM memories WHERE DATE(created_at) < DATE('now', '-1 day') "
454
+ "AND layer = 'short-term'"
455
+ ).fetchone()[0]
456
+
457
+ context["older_summary"] = {
458
+ "mid_term_count": mid_count,
459
+ "long_term_count": long_count,
460
+ "short_term_count": short_old_count,
461
+ "total": mid_count + long_count + short_old_count,
462
+ "hint": (
463
+ f"{mid_count} mid-term memories, {long_count} long-term memories "
464
+ "available via memory_search"
465
+ ),
466
+ }
467
+ used_tokens += _estimate_tokens(context["older_summary"]["hint"])
468
+
469
+ # --- Seeds (titles only to save tokens) ---
341
470
  if include_seeds:
342
- seeds = self.primary.list_summaries(
471
+ seed_rows = self.primary.list_summaries(
343
472
  tags=["seed"],
344
473
  limit=10,
345
474
  order_by="emotional_intensity",
346
475
  )
347
- for seed in seeds:
476
+ seen_ids = {m["id"] for m in context["today"]}
477
+ seen_ids.update(m["id"] for m in context["yesterday"])
478
+
479
+ for seed in seed_rows:
348
480
  if seed["id"] in seen_ids:
349
481
  continue
350
- entry_text = seed["title"] + seed["summary"]
351
- entry_size = len(entry_text)
352
- if used + entry_size > char_budget:
482
+ entry = {
483
+ "id": seed["id"],
484
+ "title": seed["title"],
485
+ }
486
+ entry_tokens = _estimate_tokens(seed["title"])
487
+ if used_tokens + entry_tokens > max_tokens:
353
488
  break
354
- used += entry_size
355
- context["seeds"].append(seed)
489
+ used_tokens += entry_tokens
490
+ context["seeds"].append(entry)
356
491
 
357
492
  stats = self.primary.stats()
358
493
  context["stats"] = stats
359
494
  else:
360
- # Reason: fallback for non-SQLite backends uses full objects
495
+ # Fallback for non-SQLite backends: simple recent list
361
496
  all_mems = self.primary.list_memories(limit=strongest_count + recent_count)
362
497
  for mem in all_mems:
498
+ content_text = mem.summary or mem.content[:CONTENT_PREVIEW_LENGTH]
363
499
  entry = {
364
500
  "id": mem.id,
365
501
  "title": mem.title,
366
- "summary": mem.summary or mem.content[:CONTENT_PREVIEW_LENGTH],
502
+ "summary": _first_n_sentences(content_text, 2),
367
503
  "emotional_intensity": mem.emotional.intensity,
368
504
  "layer": mem.layer.value,
369
505
  }
370
- entry_size = len(entry["title"] + entry["summary"])
371
- if used + entry_size > char_budget:
506
+ entry_tokens = _estimate_tokens(entry["title"] + " " + entry["summary"])
507
+ if used_tokens + entry_tokens > max_tokens:
372
508
  break
373
- used += entry_size
374
- context["memories"].append(entry)
509
+ used_tokens += entry_tokens
510
+ context["today"].append(entry)
375
511
 
376
- context["token_estimate"] = used // 4
512
+ context["token_estimate"] = used_tokens
513
+ context["token_budget"] = max_tokens
377
514
  return context
378
515
 
379
516
  def export_backup(self, output_path: str | None = None) -> str:
@@ -381,7 +518,7 @@ class MemoryStore:
381
518
 
382
519
  Args:
383
520
  output_path: Destination file. Defaults to
384
- ``~/.skmemory/backups/skmemory-backup-YYYY-MM-DD.json``.
521
+ ``~/.skcapstone/backups/skmemory-backup-YYYY-MM-DD.json``.
385
522
 
386
523
  Returns:
387
524
  str: Path to the written backup file.
@@ -418,6 +555,38 @@ class MemoryStore:
418
555
  f"Import not supported for backend: {type(self.primary).__name__}"
419
556
  )
420
557
 
558
+ def list_backups(self, backup_dir: str | None = None) -> list[dict]:
559
+ """List all skmemory backup files, sorted newest first.
560
+
561
+ Args:
562
+ backup_dir: Directory to scan. Defaults to
563
+ ``~/.skcapstone/backups/``.
564
+
565
+ Returns:
566
+ list[dict]: Backup entries with ``path``, ``name``,
567
+ ``size_bytes``, and ``date`` keys.
568
+ """
569
+ if isinstance(self.primary, SQLiteBackend):
570
+ return self.primary.list_backups(backup_dir)
571
+ return []
572
+
573
+ def prune_backups(
574
+ self, keep: int = 7, backup_dir: str | None = None
575
+ ) -> list[str]:
576
+ """Delete oldest backups, keeping only the N most recent.
577
+
578
+ Args:
579
+ keep: Number of backups to retain (default: 7).
580
+ backup_dir: Directory to prune. Defaults to
581
+ ``~/.skcapstone/backups/``.
582
+
583
+ Returns:
584
+ list[str]: Paths of deleted backup files.
585
+ """
586
+ if isinstance(self.primary, SQLiteBackend):
587
+ return self.primary.prune_backups(keep=keep, backup_dir=backup_dir)
588
+ return []
589
+
421
590
  def reindex(self) -> int:
422
591
  """Rebuild the SQLite index from JSON files.
423
592
 
@@ -442,4 +611,46 @@ class MemoryStore:
442
611
  status["vector"] = self.vector.health_check()
443
612
  except Exception as e:
444
613
  status["vector"] = {"ok": False, "error": str(e)}
614
+ if self.graph:
615
+ try:
616
+ status["graph"] = self.graph.health_check()
617
+ except Exception as e:
618
+ status["graph"] = {"ok": False, "error": str(e)}
445
619
  return status
620
+
621
+
622
+ def _estimate_tokens(text: str) -> int:
623
+ """Estimate token count using word_count * 1.3 approximation.
624
+
625
+ Args:
626
+ text: The text to estimate.
627
+
628
+ Returns:
629
+ int: Approximate token count.
630
+ """
631
+ if not text:
632
+ return 0
633
+ word_count = len(text.split())
634
+ return int(word_count * 1.3)
635
+
636
+
637
+ def _first_n_sentences(text: str, n: int = 2) -> str:
638
+ """Extract the first N sentences from text.
639
+
640
+ Args:
641
+ text: Source text.
642
+ n: Number of sentences to extract.
643
+
644
+ Returns:
645
+ str: The first N sentences, or the full text if fewer exist.
646
+ """
647
+ if not text:
648
+ return ""
649
+ # Split on sentence-ending punctuation followed by whitespace
650
+ import re
651
+ sentences = re.split(r'(?<=[.!?])\s+', text.strip())
652
+ result = " ".join(sentences[:n])
653
+ # Cap at 200 chars as a safety net
654
+ if len(result) > 200:
655
+ result = result[:197] + "..."
656
+ return result