superlocalmemory 2.7.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.sh +59 -0
  7. package/mcp_server.py +83 -7
  8. package/package.json +1 -8
  9. package/scripts/generate-thumbnails.py +3 -5
  10. package/skills/slm-build-graph/SKILL.md +1 -1
  11. package/skills/slm-list-recent/SKILL.md +1 -1
  12. package/skills/slm-recall/SKILL.md +1 -1
  13. package/skills/slm-remember/SKILL.md +1 -1
  14. package/skills/slm-show-patterns/SKILL.md +1 -1
  15. package/skills/slm-status/SKILL.md +1 -1
  16. package/skills/slm-switch-profile/SKILL.md +1 -1
  17. package/src/agent_registry.py +7 -18
  18. package/src/auth_middleware.py +3 -5
  19. package/src/auto_backup.py +3 -7
  20. package/src/behavioral/__init__.py +49 -0
  21. package/src/behavioral/behavioral_listener.py +203 -0
  22. package/src/behavioral/behavioral_patterns.py +275 -0
  23. package/src/behavioral/cross_project_transfer.py +206 -0
  24. package/src/behavioral/outcome_inference.py +194 -0
  25. package/src/behavioral/outcome_tracker.py +193 -0
  26. package/src/behavioral/tests/__init__.py +4 -0
  27. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  28. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  29. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  30. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  31. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  32. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  33. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  34. package/src/cache_manager.py +4 -6
  35. package/src/compliance/__init__.py +48 -0
  36. package/src/compliance/abac_engine.py +149 -0
  37. package/src/compliance/abac_middleware.py +116 -0
  38. package/src/compliance/audit_db.py +215 -0
  39. package/src/compliance/audit_logger.py +148 -0
  40. package/src/compliance/retention_manager.py +289 -0
  41. package/src/compliance/retention_scheduler.py +186 -0
  42. package/src/compliance/tests/__init__.py +4 -0
  43. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  44. package/src/compliance/tests/test_abac_engine.py +124 -0
  45. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  46. package/src/compliance/tests/test_audit_db.py +123 -0
  47. package/src/compliance/tests/test_audit_logger.py +98 -0
  48. package/src/compliance/tests/test_mcp_audit.py +128 -0
  49. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  50. package/src/compliance/tests/test_retention_manager.py +131 -0
  51. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  52. package/src/db_connection_manager.py +2 -12
  53. package/src/embedding_engine.py +61 -669
  54. package/src/embeddings/__init__.py +47 -0
  55. package/src/embeddings/cache.py +70 -0
  56. package/src/embeddings/cli.py +113 -0
  57. package/src/embeddings/constants.py +47 -0
  58. package/src/embeddings/database.py +91 -0
  59. package/src/embeddings/engine.py +247 -0
  60. package/src/embeddings/model_loader.py +145 -0
  61. package/src/event_bus.py +3 -13
  62. package/src/graph/__init__.py +36 -0
  63. package/src/graph/build_helpers.py +74 -0
  64. package/src/graph/cli.py +87 -0
  65. package/src/graph/cluster_builder.py +188 -0
  66. package/src/graph/cluster_summary.py +148 -0
  67. package/src/graph/constants.py +47 -0
  68. package/src/graph/edge_builder.py +162 -0
  69. package/src/graph/entity_extractor.py +95 -0
  70. package/src/graph/graph_core.py +226 -0
  71. package/src/graph/graph_search.py +231 -0
  72. package/src/graph/hierarchical.py +207 -0
  73. package/src/graph/schema.py +99 -0
  74. package/src/graph_engine.py +45 -1451
  75. package/src/hnsw_index.py +3 -7
  76. package/src/hybrid_search.py +36 -683
  77. package/src/learning/__init__.py +27 -12
  78. package/src/learning/adaptive_ranker.py +50 -12
  79. package/src/learning/cross_project_aggregator.py +2 -12
  80. package/src/learning/engagement_tracker.py +2 -12
  81. package/src/learning/feature_extractor.py +175 -43
  82. package/src/learning/feedback_collector.py +7 -12
  83. package/src/learning/learning_db.py +180 -12
  84. package/src/learning/project_context_manager.py +2 -12
  85. package/src/learning/source_quality_scorer.py +2 -12
  86. package/src/learning/synthetic_bootstrap.py +2 -12
  87. package/src/learning/tests/__init__.py +2 -0
  88. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  89. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  90. package/src/learning/tests/test_aggregator.py +2 -6
  91. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  92. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  93. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  94. package/src/learning/tests/test_feedback_collector.py +2 -6
  95. package/src/learning/tests/test_learning_db.py +2 -6
  96. package/src/learning/tests/test_learning_db_v28.py +110 -0
  97. package/src/learning/tests/test_learning_init_v28.py +48 -0
  98. package/src/learning/tests/test_outcome_signals.py +48 -0
  99. package/src/learning/tests/test_project_context.py +2 -6
  100. package/src/learning/tests/test_schema_migration.py +319 -0
  101. package/src/learning/tests/test_signal_inference.py +11 -13
  102. package/src/learning/tests/test_source_quality.py +2 -6
  103. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  104. package/src/learning/tests/test_workflow_miner.py +2 -6
  105. package/src/learning/workflow_pattern_miner.py +2 -12
  106. package/src/lifecycle/__init__.py +54 -0
  107. package/src/lifecycle/bounded_growth.py +239 -0
  108. package/src/lifecycle/compaction_engine.py +226 -0
  109. package/src/lifecycle/lifecycle_engine.py +302 -0
  110. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  111. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  112. package/src/lifecycle/retention_policy.py +285 -0
  113. package/src/lifecycle/tests/__init__.py +4 -0
  114. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  115. package/src/lifecycle/tests/test_compaction.py +179 -0
  116. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  117. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  118. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  119. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  120. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  121. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  122. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  123. package/src/mcp_tools_v28.py +280 -0
  124. package/src/memory-profiles.py +2 -12
  125. package/src/memory-reset.py +2 -12
  126. package/src/memory_compression.py +2 -12
  127. package/src/memory_store_v2.py +76 -20
  128. package/src/migrate_v1_to_v2.py +2 -12
  129. package/src/pattern_learner.py +29 -975
  130. package/src/patterns/__init__.py +24 -0
  131. package/src/patterns/analyzers.py +247 -0
  132. package/src/patterns/learner.py +267 -0
  133. package/src/patterns/scoring.py +167 -0
  134. package/src/patterns/store.py +223 -0
  135. package/src/patterns/terminology.py +138 -0
  136. package/src/provenance_tracker.py +4 -14
  137. package/src/query_optimizer.py +4 -6
  138. package/src/rate_limiter.py +2 -6
  139. package/src/search/__init__.py +20 -0
  140. package/src/search/cli.py +77 -0
  141. package/src/search/constants.py +26 -0
  142. package/src/search/engine.py +239 -0
  143. package/src/search/fusion.py +122 -0
  144. package/src/search/index_loader.py +112 -0
  145. package/src/search/methods.py +162 -0
  146. package/src/search_engine_v2.py +4 -6
  147. package/src/setup_validator.py +7 -13
  148. package/src/subscription_manager.py +2 -12
  149. package/src/tree/__init__.py +59 -0
  150. package/src/tree/builder.py +183 -0
  151. package/src/tree/nodes.py +196 -0
  152. package/src/tree/queries.py +252 -0
  153. package/src/tree/schema.py +76 -0
  154. package/src/tree_manager.py +10 -711
  155. package/src/trust/__init__.py +45 -0
  156. package/src/trust/constants.py +66 -0
  157. package/src/trust/queries.py +157 -0
  158. package/src/trust/schema.py +95 -0
  159. package/src/trust/scorer.py +299 -0
  160. package/src/trust/signals.py +95 -0
  161. package/src/trust_scorer.py +39 -697
  162. package/src/webhook_dispatcher.py +2 -12
  163. package/ui/app.js +1 -1
  164. package/ui/js/agents.js +1 -1
  165. package/ui_server.py +2 -14
  166. package/ATTRIBUTION.md +0 -140
  167. package/docs/ARCHITECTURE-V2.5.md +0 -190
  168. package/docs/GRAPH-ENGINE.md +0 -503
  169. package/docs/architecture-diagram.drawio +0 -405
  170. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,54 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """SLM v2.8 Lifecycle Engine — Memory State Machine + Bounded Growth.
4
+
5
+ Manages memory states: ACTIVE → WARM → COLD → ARCHIVED → TOMBSTONED.
6
+ Layers on top of existing tier-based compression.
7
+ All features opt-in: absent config = v2.7 behavior.
8
+
9
+ Graceful degradation: if this module fails to import,
10
+ core memory operations continue unchanged.
11
+ """
12
+ import threading
13
+ from pathlib import Path
14
+ from typing import Optional, Dict, Any
15
+
16
+ # Feature flags
17
+ LIFECYCLE_AVAILABLE = False
18
+ _init_error = None
19
+
20
+ try:
21
+ from .lifecycle_engine import LifecycleEngine
22
+ from .lifecycle_evaluator import LifecycleEvaluator
23
+ from .retention_policy import RetentionPolicyManager
24
+ from .bounded_growth import BoundedGrowthEnforcer
25
+ LIFECYCLE_AVAILABLE = True
26
+ except ImportError as e:
27
+ _init_error = str(e)
28
+
29
+ # Lazy singletons
30
+ _lifecycle_engine: Optional["LifecycleEngine"] = None
31
+ _lifecycle_lock = threading.Lock()
32
+
33
+
34
+ def get_lifecycle_engine(db_path: Optional[Path] = None) -> Optional["LifecycleEngine"]:
35
+ """Get or create the lifecycle engine singleton. Returns None if unavailable."""
36
+ global _lifecycle_engine
37
+ if not LIFECYCLE_AVAILABLE:
38
+ return None
39
+ with _lifecycle_lock:
40
+ if _lifecycle_engine is None:
41
+ try:
42
+ _lifecycle_engine = LifecycleEngine(db_path)
43
+ except Exception:
44
+ return None
45
+ return _lifecycle_engine
46
+
47
+
48
+ def get_status() -> Dict[str, Any]:
49
+ """Return lifecycle engine status."""
50
+ return {
51
+ "lifecycle_available": LIFECYCLE_AVAILABLE,
52
+ "init_error": _init_error,
53
+ "engine_active": _lifecycle_engine is not None,
54
+ }
@@ -0,0 +1,239 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Bounded growth enforcement — ensures memory counts stay within limits.
4
+
5
+ When the count of memories in a given lifecycle state exceeds configurable
6
+ bounds, the lowest-scoring memories are transitioned to the next state.
7
+
8
+ Scoring formula: importance_norm * recency_factor * frequency_factor * behavioral_value
9
+ - importance_norm: importance / 10.0 (0.1 to 1.0)
10
+ - recency_factor: 1.0 / (1.0 + days_stale / 30.0) (exponential decay)
11
+ - frequency_factor: 0.5 + 0.5 * min(access_count / age_days, 1.0)
12
+ - behavioral_value: 1.0 (placeholder for Phase 2 integration)
13
+
14
+ Lower score = evict first.
15
+ """
16
+ import sqlite3
17
+ import json
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Optional, Dict, Any, List
21
+
22
+ from .lifecycle_engine import LifecycleEngine
23
+
24
+
25
+ # Default bounds — generous defaults, configurable via lifecycle_config.json
26
+ DEFAULT_BOUNDS: Dict[str, int] = {
27
+ "max_active": 10000,
28
+ "max_warm": 5000,
29
+ }
30
+
31
+
32
+ class BoundedGrowthEnforcer:
33
+ """Enforces memory count limits by transitioning lowest-scoring memories.
34
+
35
+ When active memories exceed max_active, the lowest-scoring transition to warm.
36
+ When warm memories exceed max_warm, the lowest-scoring transition to cold.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ db_path: Optional[str] = None,
42
+ config_path: Optional[str] = None,
43
+ ):
44
+ if db_path is None:
45
+ db_path = str(Path.home() / ".claude-memory" / "memory.db")
46
+ self._db_path = str(db_path)
47
+ self._config_path = config_path
48
+ self._engine = LifecycleEngine(self._db_path)
49
+
50
+ def _get_connection(self) -> sqlite3.Connection:
51
+ """Get a SQLite connection to memory.db."""
52
+ conn = sqlite3.connect(self._db_path)
53
+ conn.row_factory = sqlite3.Row
54
+ return conn
55
+
56
+ def enforce_bounds(self, profile: Optional[str] = None) -> Dict[str, Any]:
57
+ """Check memory counts and transition excess memories.
58
+
59
+ Args:
60
+ profile: Filter by profile (None = all)
61
+
62
+ Returns:
63
+ Dict with enforced status, counts, limits, and transitions list
64
+ """
65
+ bounds = self._load_bounds()
66
+ max_active = bounds.get("max_active", DEFAULT_BOUNDS["max_active"])
67
+ max_warm = bounds.get("max_warm", DEFAULT_BOUNDS["max_warm"])
68
+
69
+ transitions: List[Dict[str, Any]] = []
70
+
71
+ # Enforce active limit
72
+ active_transitions = self._enforce_state_limit(
73
+ state="active",
74
+ target_state="warm",
75
+ max_count=max_active,
76
+ profile=profile,
77
+ )
78
+ transitions.extend(active_transitions)
79
+
80
+ # Enforce warm limit
81
+ warm_transitions = self._enforce_state_limit(
82
+ state="warm",
83
+ target_state="cold",
84
+ max_count=max_warm,
85
+ profile=profile,
86
+ )
87
+ transitions.extend(warm_transitions)
88
+
89
+ # Build result
90
+ dist = self._engine.get_state_distribution()
91
+ return {
92
+ "enforced": len(transitions) > 0,
93
+ "active_count": dist.get("active", 0),
94
+ "active_limit": max_active,
95
+ "warm_count": dist.get("warm", 0),
96
+ "warm_limit": max_warm,
97
+ "transitions": transitions,
98
+ }
99
+
100
+ def _enforce_state_limit(
101
+ self,
102
+ state: str,
103
+ target_state: str,
104
+ max_count: int,
105
+ profile: Optional[str] = None,
106
+ ) -> List[Dict[str, Any]]:
107
+ """Enforce a single state's count limit.
108
+
109
+ Scores all memories in the given state, evicts the lowest-scoring
110
+ excess memories to target_state.
111
+ """
112
+ scored = self.score_all_memories(state=state, profile=profile)
113
+ current_count = len(scored)
114
+
115
+ if current_count <= max_count:
116
+ return []
117
+
118
+ excess = current_count - max_count
119
+ # Sort ascending by score — lowest scores evicted first
120
+ scored.sort(key=lambda s: s["score"])
121
+ to_evict = scored[:excess]
122
+
123
+ # Batch transition for performance — single connection, single commit
124
+ mem_ids = [e["memory_id"] for e in to_evict]
125
+ reasons = [f"bounded_growth_score_{e['score']:.4f}" for e in to_evict]
126
+ score_map = {e["memory_id"]: e["score"] for e in to_evict}
127
+
128
+ result = self._engine.batch_transition(mem_ids, target_state, reasons)
129
+
130
+ transitions = []
131
+ for entry in result.get("succeeded", []):
132
+ transitions.append({
133
+ "memory_id": entry["memory_id"],
134
+ "from_state": state,
135
+ "to_state": target_state,
136
+ "score": score_map.get(entry["memory_id"], 0.0),
137
+ })
138
+ return transitions
139
+
140
+ def score_all_memories(
141
+ self,
142
+ state: Optional[str] = None,
143
+ profile: Optional[str] = None,
144
+ ) -> List[Dict[str, Any]]:
145
+ """Score all memories in a given state.
146
+
147
+ Args:
148
+ state: Lifecycle state to filter (None = all non-terminal states)
149
+ profile: Filter by profile
150
+
151
+ Returns:
152
+ List of dicts with memory_id and score, sorted descending by score
153
+ """
154
+ conn = self._get_connection()
155
+ try:
156
+ if state:
157
+ query = (
158
+ "SELECT id, importance, last_accessed, created_at, access_count "
159
+ "FROM memories WHERE lifecycle_state = ?"
160
+ )
161
+ params: list = [state]
162
+ else:
163
+ query = (
164
+ "SELECT id, importance, last_accessed, created_at, access_count "
165
+ "FROM memories WHERE lifecycle_state IN ('active', 'warm', 'cold')"
166
+ )
167
+ params = []
168
+
169
+ if profile:
170
+ query += " AND profile = ?"
171
+ params.append(profile)
172
+
173
+ rows = conn.execute(query, params).fetchall()
174
+ now = datetime.now()
175
+
176
+ scores = []
177
+ for row in rows:
178
+ score = self._score_row(row, now)
179
+ scores.append({"memory_id": row["id"], "score": score})
180
+
181
+ scores.sort(key=lambda s: s["score"], reverse=True)
182
+ return scores
183
+ finally:
184
+ conn.close()
185
+
186
+ def _score_row(self, row: sqlite3.Row, now: datetime) -> float:
187
+ """Compute composite lifecycle score for a memory.
188
+
189
+ Score = importance_norm * recency_factor * frequency_factor * behavioral_value
190
+
191
+ Higher score = more valuable = keep longer.
192
+ """
193
+ # Importance: normalize to 0.1-1.0
194
+ importance = max(row["importance"] or 5, 1)
195
+ importance_norm = importance / 10.0
196
+
197
+ # Recency: exponential decay, halves every ~30 days
198
+ last_access_str = row["last_accessed"] or row["created_at"]
199
+ days_stale = 0
200
+ if last_access_str:
201
+ try:
202
+ last_access = datetime.fromisoformat(str(last_access_str))
203
+ days_stale = max((now - last_access).days, 0)
204
+ except (ValueError, TypeError):
205
+ days_stale = 0
206
+ recency_factor = 1.0 / (1.0 + days_stale / 30.0)
207
+
208
+ # Access frequency: normalized by age
209
+ access_count = row["access_count"] or 0
210
+ created_str = row["created_at"]
211
+ age_days = 1
212
+ if created_str:
213
+ try:
214
+ created = datetime.fromisoformat(str(created_str))
215
+ age_days = max((now - created).days, 1)
216
+ except (ValueError, TypeError):
217
+ age_days = 1
218
+ frequency_factor = 0.5 + 0.5 * min(access_count / age_days, 1.0)
219
+
220
+ # Behavioral value: placeholder for Phase 2 integration
221
+ behavioral_value = 1.0
222
+
223
+ return importance_norm * recency_factor * frequency_factor * behavioral_value
224
+
225
+ def _load_bounds(self) -> Dict[str, int]:
226
+ """Load bounds config from lifecycle_config.json. Returns defaults if missing."""
227
+ try:
228
+ if self._config_path:
229
+ config_path = Path(self._config_path)
230
+ else:
231
+ config_path = Path(self._db_path).parent / "lifecycle_config.json"
232
+ if config_path.exists():
233
+ with open(config_path) as f:
234
+ user_config = json.load(f)
235
+ bounds = user_config.get("bounds", {})
236
+ return {**DEFAULT_BOUNDS, **bounds}
237
+ except Exception:
238
+ pass
239
+ return dict(DEFAULT_BOUNDS)
@@ -0,0 +1,226 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Formal memory compaction with information preservation guarantees.
4
+
5
+ Archives full content before compaction, replaces with compressed summary
6
+ containing key entities. Supports lossless restoration from archive.
7
+ """
8
+ import re
9
+ import sqlite3
10
+ from datetime import datetime, timezone
11
+ from typing import Dict, List, Optional
12
+
13
+
14
+ # Common English stopwords for entity extraction (no external dependencies)
15
+ _STOPWORDS = frozenset(
16
+ "a an the is are was were be been being have has had do does did will would "
17
+ "shall should may might can could am not no nor and but or if then else when "
18
+ "at by for with about against between through during before after above below "
19
+ "to from up down in out on off over under again further once here there all "
20
+ "each every both few more most other some such only own same so than too very "
21
+ "just don doesn didn won wouldn isn aren wasn weren hasn haven hadn it its "
22
+ "i me my myself we our ours ourselves you your yours yourself yourselves he "
23
+ "him his himself she her hers herself they them their theirs themselves what "
24
+ "which who whom this that these those of as into how also many use used uses "
25
+ "using like make makes made includes include including provides provide "
26
+ "widely ideal rapid many".split()
27
+ )
28
+
29
+ # Minimum word length for entity candidates
30
+ _MIN_WORD_LEN = 3
31
+ # Maximum number of entities to extract
32
+ _MAX_ENTITIES = 8
33
+
34
+
35
+ class CompactionEngine:
36
+ """Manages content compaction and restoration for memory lifecycle.
37
+
38
+ When a memory transitions to ARCHIVED state, this engine:
39
+ 1. Saves the full content to memory_archive (lossless backup)
40
+ 2. Replaces content with a compact summary + key entities
41
+ 3. Can restore full content if memory is reactivated
42
+ """
43
+
44
+ def __init__(self, db_path: str) -> None:
45
+ self.db_path = db_path
46
+ self._ensure_archive_table()
47
+
48
+ def _get_conn(self) -> sqlite3.Connection:
49
+ """Create a new connection with row factory."""
50
+ conn = sqlite3.connect(self.db_path)
51
+ conn.row_factory = sqlite3.Row
52
+ return conn
53
+
54
+ def _ensure_archive_table(self) -> None:
55
+ """Ensure the memory_archive table exists."""
56
+ conn = self._get_conn()
57
+ try:
58
+ conn.execute("""
59
+ CREATE TABLE IF NOT EXISTS memory_archive (
60
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
61
+ memory_id INTEGER UNIQUE NOT NULL,
62
+ full_content TEXT NOT NULL,
63
+ archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
64
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
65
+ )
66
+ """)
67
+ conn.execute(
68
+ "CREATE INDEX IF NOT EXISTS idx_archive_memory "
69
+ "ON memory_archive(memory_id)"
70
+ )
71
+ conn.commit()
72
+ finally:
73
+ conn.close()
74
+
75
+ # ------------------------------------------------------------------
76
+ # Public API
77
+ # ------------------------------------------------------------------
78
+
79
+ def compact_memory(
80
+ self, memory_id: int, dry_run: bool = False
81
+ ) -> Dict:
82
+ """Compact a memory: archive full content, replace with summary.
83
+
84
+ Args:
85
+ memory_id: ID of the memory to compact.
86
+ dry_run: If True, compute result but do not modify the database.
87
+
88
+ Returns:
89
+ Dict with keys: success, entities, summary, original_length,
90
+ and optionally dry_run, error.
91
+ """
92
+ conn = self._get_conn()
93
+ try:
94
+ row = conn.execute(
95
+ "SELECT id, content, tags FROM memories WHERE id = ?",
96
+ (memory_id,),
97
+ ).fetchone()
98
+
99
+ if row is None:
100
+ return {"success": False, "error": "Memory not found"}
101
+
102
+ content = row["content"]
103
+ original_length = len(content)
104
+
105
+ # Extract key entities (pure-Python TF-IDF-like approach)
106
+ entities = self._extract_entities(content)
107
+ summary = self._build_summary(content, entities, original_length)
108
+
109
+ if dry_run:
110
+ return {
111
+ "success": True,
112
+ "dry_run": True,
113
+ "entities": entities,
114
+ "summary": summary,
115
+ "original_length": original_length,
116
+ }
117
+
118
+ # Archive the full content (lossless backup)
119
+ now = datetime.now(timezone.utc).isoformat()
120
+ conn.execute(
121
+ "INSERT OR REPLACE INTO memory_archive "
122
+ "(memory_id, full_content, archived_at) VALUES (?, ?, ?)",
123
+ (memory_id, content, now),
124
+ )
125
+
126
+ # Replace memory content with compacted version
127
+ conn.execute(
128
+ "UPDATE memories SET content = ? WHERE id = ?",
129
+ (summary, memory_id),
130
+ )
131
+ conn.commit()
132
+
133
+ return {
134
+ "success": True,
135
+ "entities": entities,
136
+ "summary": summary,
137
+ "original_length": original_length,
138
+ }
139
+ finally:
140
+ conn.close()
141
+
142
+ def restore_memory(self, memory_id: int) -> Dict:
143
+ """Restore a compacted memory from its archive.
144
+
145
+ Args:
146
+ memory_id: ID of the memory to restore.
147
+
148
+ Returns:
149
+ Dict with keys: success, and optionally restored_length, error.
150
+ """
151
+ conn = self._get_conn()
152
+ try:
153
+ archive = conn.execute(
154
+ "SELECT full_content FROM memory_archive WHERE memory_id = ?",
155
+ (memory_id,),
156
+ ).fetchone()
157
+
158
+ if archive is None:
159
+ return {"success": False, "error": "No archive found for memory"}
160
+
161
+ full_content = archive["full_content"]
162
+
163
+ # Restore original content
164
+ conn.execute(
165
+ "UPDATE memories SET content = ? WHERE id = ?",
166
+ (full_content, memory_id),
167
+ )
168
+
169
+ # Remove the archive entry (content is back in main table)
170
+ conn.execute(
171
+ "DELETE FROM memory_archive WHERE memory_id = ?",
172
+ (memory_id,),
173
+ )
174
+ conn.commit()
175
+
176
+ return {
177
+ "success": True,
178
+ "restored_length": len(full_content),
179
+ }
180
+ finally:
181
+ conn.close()
182
+
183
+ # ------------------------------------------------------------------
184
+ # Internal helpers
185
+ # ------------------------------------------------------------------
186
+
187
+ @staticmethod
188
+ def _extract_entities(text: str) -> List[str]:
189
+ """Extract key entities using word-frequency ranking.
190
+
191
+ Pure Python implementation — no sklearn or external NLP deps.
192
+ Tokenizes, removes stopwords, counts frequency, returns top terms.
193
+ """
194
+ # Tokenize: split on non-alphanumeric, lowercase
195
+ tokens = re.findall(r"[a-zA-Z]+", text.lower())
196
+
197
+ # Filter: remove stopwords and short tokens
198
+ meaningful = [
199
+ t for t in tokens
200
+ if t not in _STOPWORDS and len(t) >= _MIN_WORD_LEN
201
+ ]
202
+
203
+ # Count frequencies
204
+ freq: Dict[str, int] = {}
205
+ for token in meaningful:
206
+ freq[token] = freq.get(token, 0) + 1
207
+
208
+ # Sort by frequency descending, then alphabetically for stability
209
+ ranked = sorted(freq.items(), key=lambda x: (-x[1], x[0]))
210
+
211
+ # Return top N entities
212
+ return [word for word, _ in ranked[:_MAX_ENTITIES]]
213
+
214
+ @staticmethod
215
+ def _build_summary(
216
+ text: str, entities: List[str], original_length: int
217
+ ) -> str:
218
+ """Build the compacted content string.
219
+
220
+ Format: [COMPACTED] Key entities: e1, e2, ... Original length: N chars.
221
+ """
222
+ entity_str = ", ".join(entities) if entities else "none"
223
+ return (
224
+ f"[COMPACTED] Key entities: {entity_str}. "
225
+ f"Original length: {original_length} chars."
226
+ )