superlocalmemory 2.7.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.sh +59 -0
  7. package/mcp_server.py +83 -7
  8. package/package.json +1 -8
  9. package/scripts/generate-thumbnails.py +3 -5
  10. package/skills/slm-build-graph/SKILL.md +1 -1
  11. package/skills/slm-list-recent/SKILL.md +1 -1
  12. package/skills/slm-recall/SKILL.md +1 -1
  13. package/skills/slm-remember/SKILL.md +1 -1
  14. package/skills/slm-show-patterns/SKILL.md +1 -1
  15. package/skills/slm-status/SKILL.md +1 -1
  16. package/skills/slm-switch-profile/SKILL.md +1 -1
  17. package/src/agent_registry.py +7 -18
  18. package/src/auth_middleware.py +3 -5
  19. package/src/auto_backup.py +3 -7
  20. package/src/behavioral/__init__.py +49 -0
  21. package/src/behavioral/behavioral_listener.py +203 -0
  22. package/src/behavioral/behavioral_patterns.py +275 -0
  23. package/src/behavioral/cross_project_transfer.py +206 -0
  24. package/src/behavioral/outcome_inference.py +194 -0
  25. package/src/behavioral/outcome_tracker.py +193 -0
  26. package/src/behavioral/tests/__init__.py +4 -0
  27. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  28. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  29. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  30. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  31. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  32. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  33. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  34. package/src/cache_manager.py +4 -6
  35. package/src/compliance/__init__.py +48 -0
  36. package/src/compliance/abac_engine.py +149 -0
  37. package/src/compliance/abac_middleware.py +116 -0
  38. package/src/compliance/audit_db.py +215 -0
  39. package/src/compliance/audit_logger.py +148 -0
  40. package/src/compliance/retention_manager.py +289 -0
  41. package/src/compliance/retention_scheduler.py +186 -0
  42. package/src/compliance/tests/__init__.py +4 -0
  43. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  44. package/src/compliance/tests/test_abac_engine.py +124 -0
  45. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  46. package/src/compliance/tests/test_audit_db.py +123 -0
  47. package/src/compliance/tests/test_audit_logger.py +98 -0
  48. package/src/compliance/tests/test_mcp_audit.py +128 -0
  49. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  50. package/src/compliance/tests/test_retention_manager.py +131 -0
  51. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  52. package/src/db_connection_manager.py +2 -12
  53. package/src/embedding_engine.py +61 -669
  54. package/src/embeddings/__init__.py +47 -0
  55. package/src/embeddings/cache.py +70 -0
  56. package/src/embeddings/cli.py +113 -0
  57. package/src/embeddings/constants.py +47 -0
  58. package/src/embeddings/database.py +91 -0
  59. package/src/embeddings/engine.py +247 -0
  60. package/src/embeddings/model_loader.py +145 -0
  61. package/src/event_bus.py +3 -13
  62. package/src/graph/__init__.py +36 -0
  63. package/src/graph/build_helpers.py +74 -0
  64. package/src/graph/cli.py +87 -0
  65. package/src/graph/cluster_builder.py +188 -0
  66. package/src/graph/cluster_summary.py +148 -0
  67. package/src/graph/constants.py +47 -0
  68. package/src/graph/edge_builder.py +162 -0
  69. package/src/graph/entity_extractor.py +95 -0
  70. package/src/graph/graph_core.py +226 -0
  71. package/src/graph/graph_search.py +231 -0
  72. package/src/graph/hierarchical.py +207 -0
  73. package/src/graph/schema.py +99 -0
  74. package/src/graph_engine.py +45 -1451
  75. package/src/hnsw_index.py +3 -7
  76. package/src/hybrid_search.py +36 -683
  77. package/src/learning/__init__.py +27 -12
  78. package/src/learning/adaptive_ranker.py +50 -12
  79. package/src/learning/cross_project_aggregator.py +2 -12
  80. package/src/learning/engagement_tracker.py +2 -12
  81. package/src/learning/feature_extractor.py +175 -43
  82. package/src/learning/feedback_collector.py +7 -12
  83. package/src/learning/learning_db.py +180 -12
  84. package/src/learning/project_context_manager.py +2 -12
  85. package/src/learning/source_quality_scorer.py +2 -12
  86. package/src/learning/synthetic_bootstrap.py +2 -12
  87. package/src/learning/tests/__init__.py +2 -0
  88. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  89. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  90. package/src/learning/tests/test_aggregator.py +2 -6
  91. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  92. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  93. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  94. package/src/learning/tests/test_feedback_collector.py +2 -6
  95. package/src/learning/tests/test_learning_db.py +2 -6
  96. package/src/learning/tests/test_learning_db_v28.py +110 -0
  97. package/src/learning/tests/test_learning_init_v28.py +48 -0
  98. package/src/learning/tests/test_outcome_signals.py +48 -0
  99. package/src/learning/tests/test_project_context.py +2 -6
  100. package/src/learning/tests/test_schema_migration.py +319 -0
  101. package/src/learning/tests/test_signal_inference.py +11 -13
  102. package/src/learning/tests/test_source_quality.py +2 -6
  103. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  104. package/src/learning/tests/test_workflow_miner.py +2 -6
  105. package/src/learning/workflow_pattern_miner.py +2 -12
  106. package/src/lifecycle/__init__.py +54 -0
  107. package/src/lifecycle/bounded_growth.py +239 -0
  108. package/src/lifecycle/compaction_engine.py +226 -0
  109. package/src/lifecycle/lifecycle_engine.py +302 -0
  110. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  111. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  112. package/src/lifecycle/retention_policy.py +285 -0
  113. package/src/lifecycle/tests/__init__.py +4 -0
  114. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  115. package/src/lifecycle/tests/test_compaction.py +179 -0
  116. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  117. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  118. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  119. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  120. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  121. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  122. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  123. package/src/mcp_tools_v28.py +280 -0
  124. package/src/memory-profiles.py +2 -12
  125. package/src/memory-reset.py +2 -12
  126. package/src/memory_compression.py +2 -12
  127. package/src/memory_store_v2.py +76 -20
  128. package/src/migrate_v1_to_v2.py +2 -12
  129. package/src/pattern_learner.py +29 -975
  130. package/src/patterns/__init__.py +24 -0
  131. package/src/patterns/analyzers.py +247 -0
  132. package/src/patterns/learner.py +267 -0
  133. package/src/patterns/scoring.py +167 -0
  134. package/src/patterns/store.py +223 -0
  135. package/src/patterns/terminology.py +138 -0
  136. package/src/provenance_tracker.py +4 -14
  137. package/src/query_optimizer.py +4 -6
  138. package/src/rate_limiter.py +2 -6
  139. package/src/search/__init__.py +20 -0
  140. package/src/search/cli.py +77 -0
  141. package/src/search/constants.py +26 -0
  142. package/src/search/engine.py +239 -0
  143. package/src/search/fusion.py +122 -0
  144. package/src/search/index_loader.py +112 -0
  145. package/src/search/methods.py +162 -0
  146. package/src/search_engine_v2.py +4 -6
  147. package/src/setup_validator.py +7 -13
  148. package/src/subscription_manager.py +2 -12
  149. package/src/tree/__init__.py +59 -0
  150. package/src/tree/builder.py +183 -0
  151. package/src/tree/nodes.py +196 -0
  152. package/src/tree/queries.py +252 -0
  153. package/src/tree/schema.py +76 -0
  154. package/src/tree_manager.py +10 -711
  155. package/src/trust/__init__.py +45 -0
  156. package/src/trust/constants.py +66 -0
  157. package/src/trust/queries.py +157 -0
  158. package/src/trust/schema.py +95 -0
  159. package/src/trust/scorer.py +299 -0
  160. package/src/trust/signals.py +95 -0
  161. package/src/trust_scorer.py +39 -697
  162. package/src/webhook_dispatcher.py +2 -12
  163. package/ui/app.js +1 -1
  164. package/ui/js/agents.js +1 -1
  165. package/ui_server.py +2 -14
  166. package/ATTRIBUTION.md +0 -140
  167. package/docs/ARCHITECTURE-V2.5.md +0 -190
  168. package/docs/GRAPH-ENGINE.md +0 -503
  169. package/docs/architecture-diagram.drawio +0 -405
  170. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,285 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Retention policy loading, evaluation, and enforcement.
4
+
5
+ Manages retention policies that determine how long memories must be kept
6
+ in specific states. Supports GDPR (right to erasure), EU AI Act (audit
7
+ retention), and HIPAA (medical record retention) compliance frameworks.
8
+
9
+ Policies are stored in a `retention_policies` table alongside the memories
10
+ database. Each policy specifies criteria (tags, project_name) for matching
11
+ memories and an action (retain, archive, tombstone) with a retention period.
12
+ """
13
+ import json
14
+ import logging
15
+ import sqlite3
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Set
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ _POLICIES_TABLE_SQL = """
22
+ CREATE TABLE IF NOT EXISTS retention_policies (
23
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
24
+ name TEXT NOT NULL,
25
+ retention_days INTEGER NOT NULL,
26
+ framework TEXT NOT NULL,
27
+ action TEXT NOT NULL,
28
+ applies_to TEXT NOT NULL,
29
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
30
+ )
31
+ """
32
+
33
+
34
+ class RetentionPolicyManager:
35
+ """Manages retention policies for lifecycle enforcement.
36
+
37
+ Evaluates which compliance policies apply to each memory based on
38
+ tag and project_name matching. When multiple policies match, the
39
+ strictest (shortest retention_days) wins.
40
+ """
41
+
42
+ def __init__(self, db_path: Optional[str] = None):
43
+ self._db_path = db_path
44
+ if db_path:
45
+ self._ensure_table()
46
+
47
+ # ------------------------------------------------------------------
48
+ # Internal helpers
49
+ # ------------------------------------------------------------------
50
+
51
+ def _connect(self) -> sqlite3.Connection:
52
+ """Open a connection to the database."""
53
+ conn = sqlite3.connect(self._db_path)
54
+ conn.row_factory = sqlite3.Row
55
+ return conn
56
+
57
+ def _ensure_table(self) -> None:
58
+ """Create the retention_policies table if it doesn't exist."""
59
+ conn = self._connect()
60
+ try:
61
+ conn.execute(_POLICIES_TABLE_SQL)
62
+ conn.commit()
63
+ finally:
64
+ conn.close()
65
+
66
+ # ------------------------------------------------------------------
67
+ # Public API
68
+ # ------------------------------------------------------------------
69
+
70
+ def create_policy(
71
+ self,
72
+ name: str,
73
+ retention_days: int,
74
+ framework: str,
75
+ action: str,
76
+ applies_to: Dict[str, Any],
77
+ ) -> int:
78
+ """Create a new retention policy.
79
+
80
+ Args:
81
+ name: Human-readable policy name.
82
+ retention_days: Minimum days to retain (0 = immediate action).
83
+ framework: Compliance framework (gdpr, hipaa, eu_ai_act, internal).
84
+ action: What to do (retain, archive, tombstone).
85
+ applies_to: Criteria dict with optional keys: tags, project_name.
86
+
87
+ Returns:
88
+ The auto-generated policy ID.
89
+ """
90
+ conn = self._connect()
91
+ try:
92
+ cursor = conn.execute(
93
+ "INSERT INTO retention_policies (name, retention_days, framework, action, applies_to) "
94
+ "VALUES (?, ?, ?, ?, ?)",
95
+ (name, retention_days, framework, action, json.dumps(applies_to)),
96
+ )
97
+ conn.commit()
98
+ return cursor.lastrowid
99
+ finally:
100
+ conn.close()
101
+
102
+ def list_policies(self) -> List[Dict[str, Any]]:
103
+ """Return all retention policies as a list of dicts."""
104
+ conn = self._connect()
105
+ try:
106
+ rows = conn.execute("SELECT * FROM retention_policies ORDER BY id").fetchall()
107
+ return [self._row_to_dict(r) for r in rows]
108
+ finally:
109
+ conn.close()
110
+
111
+ def load_policies(self, path: str) -> int:
112
+ """Load retention policies from a JSON file.
113
+
114
+ The file must contain a JSON array of policy objects, each with
115
+ keys: name, retention_days, framework, action, applies_to.
116
+
117
+ Args:
118
+ path: Absolute or relative path to the JSON policy file.
119
+
120
+ Returns:
121
+ Number of policies loaded. Returns 0 if file is missing or
122
+ contains invalid data, without raising an exception.
123
+ """
124
+ policy_path = Path(path)
125
+ if not policy_path.exists():
126
+ logger.debug("Policy file not found: %s", path)
127
+ return 0
128
+
129
+ try:
130
+ data = json.loads(policy_path.read_text(encoding="utf-8"))
131
+ except (json.JSONDecodeError, OSError) as exc:
132
+ logger.warning("Failed to read policy file %s: %s", path, exc)
133
+ return 0
134
+
135
+ if not isinstance(data, list):
136
+ logger.warning("Policy file must contain a JSON array: %s", path)
137
+ return 0
138
+
139
+ count = 0
140
+ for entry in data:
141
+ try:
142
+ self.create_policy(
143
+ name=entry["name"],
144
+ retention_days=entry["retention_days"],
145
+ framework=entry["framework"],
146
+ action=entry["action"],
147
+ applies_to=entry.get("applies_to", {}),
148
+ )
149
+ count += 1
150
+ except (KeyError, TypeError) as exc:
151
+ logger.warning("Skipping invalid policy entry: %s", exc)
152
+
153
+ return count
154
+
155
+ def evaluate_memory(self, memory_id: int) -> Optional[Dict[str, Any]]:
156
+ """Determine which retention policy applies to a memory.
157
+
158
+ Loads the memory's tags and project_name, then checks every
159
+ policy's ``applies_to`` criteria. If multiple policies match,
160
+ the **strictest** one wins (lowest ``retention_days``).
161
+
162
+ Args:
163
+ memory_id: The memory row ID.
164
+
165
+ Returns:
166
+ A dict with ``policy_name``, ``action``, ``retention_days``,
167
+ and ``framework``; or ``None`` if no policy matches.
168
+ """
169
+ conn = self._connect()
170
+ try:
171
+ mem_row = conn.execute(
172
+ "SELECT tags, project_name FROM memories WHERE id = ?",
173
+ (memory_id,),
174
+ ).fetchone()
175
+ if mem_row is None:
176
+ return None
177
+
178
+ mem_tags = self._parse_json_field(mem_row["tags"])
179
+ mem_project = mem_row["project_name"]
180
+
181
+ policies = conn.execute(
182
+ "SELECT * FROM retention_policies ORDER BY retention_days ASC"
183
+ ).fetchall()
184
+
185
+ for policy in policies:
186
+ criteria = self._parse_json_field(policy["applies_to"])
187
+ if self._policy_matches(criteria, mem_tags, mem_project):
188
+ return {
189
+ "policy_name": policy["name"],
190
+ "action": policy["action"],
191
+ "retention_days": policy["retention_days"],
192
+ "framework": policy["framework"],
193
+ }
194
+
195
+ return None
196
+ finally:
197
+ conn.close()
198
+
199
+ def get_protected_memory_ids(self) -> Set[int]:
200
+ """Return the set of memory IDs protected by any ``retain`` policy.
201
+
202
+ A memory is protected if at least one policy with
203
+ ``action='retain'`` matches its tags or project_name.
204
+ """
205
+ conn = self._connect()
206
+ try:
207
+ retain_policies = conn.execute(
208
+ "SELECT * FROM retention_policies WHERE action = 'retain'"
209
+ ).fetchall()
210
+ if not retain_policies:
211
+ return set()
212
+
213
+ memories = conn.execute(
214
+ "SELECT id, tags, project_name FROM memories"
215
+ ).fetchall()
216
+
217
+ protected: Set[int] = set()
218
+ for mem in memories:
219
+ mem_tags = self._parse_json_field(mem["tags"])
220
+ mem_project = mem["project_name"]
221
+ for policy in retain_policies:
222
+ criteria = self._parse_json_field(policy["applies_to"])
223
+ if self._policy_matches(criteria, mem_tags, mem_project):
224
+ protected.add(mem["id"])
225
+ break # One matching retain policy is enough
226
+
227
+ return protected
228
+ finally:
229
+ conn.close()
230
+
231
+ # ------------------------------------------------------------------
232
+ # Private helpers
233
+ # ------------------------------------------------------------------
234
+
235
+ @staticmethod
236
+ def _row_to_dict(row: sqlite3.Row) -> Dict[str, Any]:
237
+ """Convert a sqlite3.Row to a plain dict with parsed applies_to."""
238
+ d = dict(row)
239
+ if "applies_to" in d and isinstance(d["applies_to"], str):
240
+ try:
241
+ d["applies_to"] = json.loads(d["applies_to"])
242
+ except (json.JSONDecodeError, TypeError):
243
+ d["applies_to"] = {}
244
+ return d
245
+
246
+ @staticmethod
247
+ def _parse_json_field(value: Any) -> Any:
248
+ """Parse a JSON string field; return as-is if already parsed."""
249
+ if isinstance(value, str):
250
+ try:
251
+ return json.loads(value)
252
+ except (json.JSONDecodeError, TypeError):
253
+ return value
254
+ return value if value is not None else []
255
+
256
+ @staticmethod
257
+ def _policy_matches(
258
+ criteria: Any, mem_tags: Any, mem_project: Optional[str]
259
+ ) -> bool:
260
+ """Check if a policy's applies_to criteria match a memory.
261
+
262
+ Matching rules:
263
+ - If criteria has ``tags``: memory must have at least one
264
+ overlapping tag.
265
+ - If criteria has ``project_name``: memory's project_name
266
+ must equal the criteria value.
267
+ - If criteria is empty (``{}``): the policy does NOT match
268
+ any memory (opt-in only).
269
+ """
270
+ if not isinstance(criteria, dict) or not criteria:
271
+ return False
272
+
273
+ matched = True # Assume match; any failing criterion flips to False
274
+
275
+ if "tags" in criteria:
276
+ policy_tags = set(criteria["tags"]) if criteria["tags"] else set()
277
+ memory_tags = set(mem_tags) if isinstance(mem_tags, list) else set()
278
+ if not policy_tags & memory_tags:
279
+ matched = False
280
+
281
+ if "project_name" in criteria:
282
+ if mem_project != criteria["project_name"]:
283
+ matched = False
284
+
285
+ return matched
@@ -0,0 +1,4 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Tests for lifecycle engine.
4
+ """
@@ -0,0 +1,193 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Tests for bounded growth enforcement — memory count limits.
4
+ """
5
+ import sqlite3
6
+ import tempfile
7
+ import os
8
+ import sys
9
+ import json
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+
13
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
14
+
15
+
16
+ class TestBoundedGrowth:
17
+ """Test bounded growth enforcement and memory scoring."""
18
+
19
+ def setup_method(self):
20
+ self.tmp_dir = tempfile.mkdtemp()
21
+ self.db_path = os.path.join(self.tmp_dir, "test.db")
22
+ conn = sqlite3.connect(self.db_path)
23
+ conn.execute("""
24
+ CREATE TABLE memories (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ content TEXT NOT NULL,
27
+ importance INTEGER DEFAULT 5,
28
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
29
+ last_accessed TIMESTAMP,
30
+ access_count INTEGER DEFAULT 0,
31
+ lifecycle_state TEXT DEFAULT 'active',
32
+ lifecycle_updated_at TIMESTAMP,
33
+ lifecycle_history TEXT DEFAULT '[]',
34
+ access_level TEXT DEFAULT 'public',
35
+ profile TEXT DEFAULT 'default'
36
+ )
37
+ """)
38
+ now = datetime.now()
39
+
40
+ # Memory 1: HIGH value — importance 9, accessed today, frequently used
41
+ conn.execute(
42
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
43
+ ("high value memory", 9, "active", now.isoformat(), (now - timedelta(days=30)).isoformat(), 20),
44
+ )
45
+ # Memory 2: MEDIUM-HIGH — importance 7, accessed 5d ago
46
+ conn.execute(
47
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
48
+ ("medium high memory", 7, "active", (now - timedelta(days=5)).isoformat(), (now - timedelta(days=60)).isoformat(), 10),
49
+ )
50
+ # Memory 3: MEDIUM — importance 5, accessed 10d ago
51
+ conn.execute(
52
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
53
+ ("medium memory", 5, "active", (now - timedelta(days=10)).isoformat(), (now - timedelta(days=90)).isoformat(), 5),
54
+ )
55
+ # Memory 4: LOW — importance 3, accessed 20d ago, rarely used
56
+ conn.execute(
57
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
58
+ ("low value memory", 3, "active", (now - timedelta(days=20)).isoformat(), (now - timedelta(days=120)).isoformat(), 2),
59
+ )
60
+ # Memory 5: LOWEST — importance 1, accessed 40d ago, never reused
61
+ conn.execute(
62
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
63
+ ("lowest value memory", 1, "active", (now - timedelta(days=40)).isoformat(), (now - timedelta(days=150)).isoformat(), 0),
64
+ )
65
+ # Memory 6: Warm state (for warm bounds test) — importance 2, stale
66
+ conn.execute(
67
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
68
+ ("warm memory A", 2, "warm", (now - timedelta(days=50)).isoformat(), (now - timedelta(days=200)).isoformat(), 1),
69
+ )
70
+ # Memory 7: Warm state — importance 4
71
+ conn.execute(
72
+ "INSERT INTO memories (content, importance, lifecycle_state, last_accessed, created_at, access_count) VALUES (?, ?, ?, ?, ?, ?)",
73
+ ("warm memory B", 4, "warm", (now - timedelta(days=30)).isoformat(), (now - timedelta(days=100)).isoformat(), 3),
74
+ )
75
+ conn.commit()
76
+ conn.close()
77
+
78
+ def teardown_method(self):
79
+ import shutil
80
+ shutil.rmtree(self.tmp_dir, ignore_errors=True)
81
+
82
+ def test_no_action_under_limit(self):
83
+ """No transitions when counts are within bounds."""
84
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
85
+ enforcer = BoundedGrowthEnforcer(self.db_path)
86
+ result = enforcer.enforce_bounds()
87
+ assert result["enforced"] is False
88
+ assert len(result["transitions"]) == 0
89
+
90
+ def test_enforce_active_limit(self):
91
+ """When active_count > max_active, excess memories transition to warm."""
92
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
93
+ config_path = os.path.join(self.tmp_dir, "lifecycle_config.json")
94
+ with open(config_path, "w") as f:
95
+ json.dump({"bounds": {"max_active": 3, "max_warm": 100}}, f)
96
+ enforcer = BoundedGrowthEnforcer(self.db_path, config_path=config_path)
97
+ result = enforcer.enforce_bounds()
98
+ assert result["enforced"] is True
99
+ # 5 active, limit 3 -> 2 should transition
100
+ assert len(result["transitions"]) == 2
101
+
102
+ def test_lowest_scoring_evicted_first(self):
103
+ """The lowest-scoring memories should be the ones transitioned."""
104
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
105
+ config_path = os.path.join(self.tmp_dir, "lifecycle_config.json")
106
+ with open(config_path, "w") as f:
107
+ json.dump({"bounds": {"max_active": 3, "max_warm": 100}}, f)
108
+ enforcer = BoundedGrowthEnforcer(self.db_path, config_path=config_path)
109
+ result = enforcer.enforce_bounds()
110
+ evicted_ids = {t["memory_id"] for t in result["transitions"]}
111
+ # Memory 5 (importance 1, stale 40d) and Memory 4 (importance 3, stale 20d)
112
+ # should be evicted — lowest scores
113
+ assert 5 in evicted_ids
114
+ assert 4 in evicted_ids
115
+ # Top 3 memories (1, 2, 3) should survive
116
+ assert 1 not in evicted_ids
117
+ assert 2 not in evicted_ids
118
+ assert 3 not in evicted_ids
119
+
120
+ def test_evicted_memories_now_warm(self):
121
+ """Evicted memories should now be in 'warm' state in the database."""
122
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
123
+ config_path = os.path.join(self.tmp_dir, "lifecycle_config.json")
124
+ with open(config_path, "w") as f:
125
+ json.dump({"bounds": {"max_active": 3, "max_warm": 100}}, f)
126
+ enforcer = BoundedGrowthEnforcer(self.db_path, config_path=config_path)
127
+ enforcer.enforce_bounds()
128
+ conn = sqlite3.connect(self.db_path)
129
+ row4 = conn.execute("SELECT lifecycle_state FROM memories WHERE id=4").fetchone()
130
+ row5 = conn.execute("SELECT lifecycle_state FROM memories WHERE id=5").fetchone()
131
+ conn.close()
132
+ assert row4[0] == "warm"
133
+ assert row5[0] == "warm"
134
+
135
+ def test_enforce_warm_limit(self):
136
+ """When warm_count > max_warm, excess warm memories transition to cold."""
137
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
138
+ config_path = os.path.join(self.tmp_dir, "lifecycle_config.json")
139
+ with open(config_path, "w") as f:
140
+ json.dump({"bounds": {"max_active": 100, "max_warm": 1}}, f)
141
+ enforcer = BoundedGrowthEnforcer(self.db_path, config_path=config_path)
142
+ result = enforcer.enforce_bounds()
143
+ assert result["enforced"] is True
144
+ # 2 warm (ids 6, 7), limit 1 -> 1 transition
145
+ warm_transitions = [t for t in result["transitions"] if t["from_state"] == "warm"]
146
+ assert len(warm_transitions) == 1
147
+ # Memory 6 (importance 2, stale 50d) should be evicted before Memory 7 (importance 4, stale 30d)
148
+ assert warm_transitions[0]["memory_id"] == 6
149
+
150
+ def test_score_memory_importance_matters(self):
151
+ """Higher importance -> higher score, all else equal."""
152
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
153
+ enforcer = BoundedGrowthEnforcer(self.db_path)
154
+ scores = enforcer.score_all_memories()
155
+ # Memory 1 (importance 9) should score higher than Memory 5 (importance 1)
156
+ score_map = {s["memory_id"]: s["score"] for s in scores}
157
+ assert score_map[1] > score_map[5]
158
+
159
+ def test_score_memory_recency_matters(self):
160
+ """More recently accessed -> higher score."""
161
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
162
+ enforcer = BoundedGrowthEnforcer(self.db_path)
163
+ scores = enforcer.score_all_memories()
164
+ score_map = {s["memory_id"]: s["score"] for s in scores}
165
+ # Memory 1 (accessed today) should score higher than Memory 3 (accessed 10d ago)
166
+ # (both active, Memory 1 also has higher importance, so this should hold)
167
+ assert score_map[1] > score_map[3]
168
+
169
+ def test_score_all_returns_all_active(self):
170
+ """score_all_memories returns scores for all memories in given state."""
171
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
172
+ enforcer = BoundedGrowthEnforcer(self.db_path)
173
+ scores = enforcer.score_all_memories(state="active")
174
+ assert len(scores) == 5 # 5 active memories
175
+
176
+ def test_result_structure(self):
177
+ """enforce_bounds returns properly structured result dict."""
178
+ from lifecycle.bounded_growth import BoundedGrowthEnforcer
179
+ enforcer = BoundedGrowthEnforcer(self.db_path)
180
+ result = enforcer.enforce_bounds()
181
+ assert "enforced" in result
182
+ assert "active_count" in result
183
+ assert "active_limit" in result
184
+ assert "warm_count" in result
185
+ assert "warm_limit" in result
186
+ assert "transitions" in result
187
+ assert isinstance(result["transitions"], list)
188
+
189
+ def test_default_bounds(self):
190
+ """Default bounds should be max_active=10000, max_warm=5000."""
191
+ from lifecycle.bounded_growth import DEFAULT_BOUNDS
192
+ assert DEFAULT_BOUNDS["max_active"] == 10000
193
+ assert DEFAULT_BOUNDS["max_warm"] == 5000
@@ -0,0 +1,179 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Tests for compaction engine — content archival and restoration.
4
+ """
5
+ import sqlite3
6
+ import tempfile
7
+ import os
8
+ import sys
9
+ import json
10
+ from pathlib import Path
11
+
12
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
13
+
14
+
15
+ class TestCompactionEngine:
16
+ """Test memory compaction and restoration."""
17
+
18
+ def setup_method(self):
19
+ self.tmp_dir = tempfile.mkdtemp()
20
+ self.db_path = os.path.join(self.tmp_dir, "test.db")
21
+ conn = sqlite3.connect(self.db_path)
22
+ conn.execute("""
23
+ CREATE TABLE memories (
24
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
25
+ content TEXT NOT NULL,
26
+ importance INTEGER DEFAULT 5,
27
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
28
+ last_accessed TIMESTAMP,
29
+ access_count INTEGER DEFAULT 0,
30
+ lifecycle_state TEXT DEFAULT 'active',
31
+ lifecycle_updated_at TIMESTAMP,
32
+ lifecycle_history TEXT DEFAULT '[]',
33
+ access_level TEXT DEFAULT 'public',
34
+ profile TEXT DEFAULT 'default',
35
+ tags TEXT DEFAULT '[]',
36
+ summary TEXT
37
+ )
38
+ """)
39
+ conn.execute("""
40
+ CREATE TABLE IF NOT EXISTS memory_archive (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ memory_id INTEGER UNIQUE NOT NULL,
43
+ full_content TEXT NOT NULL,
44
+ archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
45
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
46
+ )
47
+ """)
48
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_archive_memory ON memory_archive(memory_id)")
49
+
50
+ # Memory 1: Long content suitable for compaction
51
+ long_content = (
52
+ "The Python programming language is widely used for machine learning and data science. "
53
+ "It provides libraries like scikit-learn, TensorFlow, and PyTorch for building models. "
54
+ "Python's simplicity and readability make it ideal for rapid prototyping. "
55
+ "The ecosystem includes tools for data preprocessing, visualization, and deployment. "
56
+ "Many enterprise applications use Python for backend services and API development."
57
+ )
58
+ conn.execute(
59
+ "INSERT INTO memories (content, importance, lifecycle_state, tags) VALUES (?, ?, ?, ?)",
60
+ (long_content, 5, "cold", '["python","ml"]'),
61
+ )
62
+ # Memory 2: Short content
63
+ conn.execute(
64
+ "INSERT INTO memories (content, importance, lifecycle_state) VALUES (?, ?, ?)",
65
+ ("brief note about testing", 3, "cold"),
66
+ )
67
+ # Memory 3: Already archived
68
+ conn.execute(
69
+ "INSERT INTO memories (content, importance, lifecycle_state) VALUES (?, ?, ?)",
70
+ ("[COMPACTED] Key entities: database, SQL", 5, "archived"),
71
+ )
72
+ conn.execute(
73
+ "INSERT INTO memory_archive (memory_id, full_content) VALUES (?, ?)",
74
+ (3, "The database management system uses SQL for querying and PostgreSQL for storage."),
75
+ )
76
+ conn.commit()
77
+ conn.close()
78
+
79
+ def teardown_method(self):
80
+ import shutil
81
+ shutil.rmtree(self.tmp_dir, ignore_errors=True)
82
+
83
+ def test_compact_memory_archives_content(self):
84
+ """Compaction stores full content in memory_archive."""
85
+ from lifecycle.compaction_engine import CompactionEngine
86
+ engine = CompactionEngine(self.db_path)
87
+ result = engine.compact_memory(1)
88
+ assert result["success"] is True
89
+ # Verify archive has full content
90
+ conn = sqlite3.connect(self.db_path)
91
+ row = conn.execute("SELECT full_content FROM memory_archive WHERE memory_id=1").fetchone()
92
+ conn.close()
93
+ assert row is not None
94
+ assert "Python programming" in row[0]
95
+
96
+ def test_compact_memory_replaces_content(self):
97
+ """Compacted memory content is replaced with summary + entities."""
98
+ from lifecycle.compaction_engine import CompactionEngine
99
+ engine = CompactionEngine(self.db_path)
100
+ engine.compact_memory(1)
101
+ conn = sqlite3.connect(self.db_path)
102
+ row = conn.execute("SELECT content FROM memories WHERE id=1").fetchone()
103
+ conn.close()
104
+ # Content should be shorter than original
105
+ assert len(row[0]) < 300
106
+ assert "[COMPACTED]" in row[0]
107
+
108
+ def test_compact_preserves_key_entities(self):
109
+ """Compacted content preserves key entities/terms."""
110
+ from lifecycle.compaction_engine import CompactionEngine
111
+ engine = CompactionEngine(self.db_path)
112
+ result = engine.compact_memory(1)
113
+ assert "entities" in result
114
+ assert len(result["entities"]) >= 3
115
+ # Should extract key terms like "python", "learning", "data"
116
+ entities_lower = [e.lower() for e in result["entities"]]
117
+ assert any("python" in e for e in entities_lower)
118
+
119
+ def test_compact_preserves_tags(self):
120
+ """Compaction does NOT remove tags from the memory."""
121
+ from lifecycle.compaction_engine import CompactionEngine
122
+ engine = CompactionEngine(self.db_path)
123
+ engine.compact_memory(1)
124
+ conn = sqlite3.connect(self.db_path)
125
+ row = conn.execute("SELECT tags FROM memories WHERE id=1").fetchone()
126
+ conn.close()
127
+ assert row[0] is not None
128
+ tags = json.loads(row[0])
129
+ assert "python" in tags
130
+
131
+ def test_restore_memory_from_archive(self):
132
+ """Restoring a compacted memory brings back full content."""
133
+ from lifecycle.compaction_engine import CompactionEngine
134
+ engine = CompactionEngine(self.db_path)
135
+ result = engine.restore_memory(3) # Already archived memory
136
+ assert result["success"] is True
137
+ conn = sqlite3.connect(self.db_path)
138
+ row = conn.execute("SELECT content FROM memories WHERE id=3").fetchone()
139
+ conn.close()
140
+ assert "database management" in row[0]
141
+
142
+ def test_restore_cleans_archive(self):
143
+ """After restoration, the archive entry is removed."""
144
+ from lifecycle.compaction_engine import CompactionEngine
145
+ engine = CompactionEngine(self.db_path)
146
+ engine.restore_memory(3)
147
+ conn = sqlite3.connect(self.db_path)
148
+ row = conn.execute("SELECT * FROM memory_archive WHERE memory_id=3").fetchone()
149
+ conn.close()
150
+ assert row is None
151
+
152
+ def test_dry_run_no_changes(self):
153
+ """dry_run mode shows what would happen without modifying DB."""
154
+ from lifecycle.compaction_engine import CompactionEngine
155
+ engine = CompactionEngine(self.db_path)
156
+ result = engine.compact_memory(1, dry_run=True)
157
+ assert result["success"] is True
158
+ assert result["dry_run"] is True
159
+ # Verify DB was NOT modified
160
+ conn = sqlite3.connect(self.db_path)
161
+ row = conn.execute("SELECT content FROM memories WHERE id=1").fetchone()
162
+ archive = conn.execute("SELECT * FROM memory_archive WHERE memory_id=1").fetchone()
163
+ conn.close()
164
+ assert "Python programming" in row[0] # Original content still there
165
+ assert archive is None # No archive entry created
166
+
167
+ def test_compact_nonexistent_memory(self):
168
+ """Compacting nonexistent memory returns failure."""
169
+ from lifecycle.compaction_engine import CompactionEngine
170
+ engine = CompactionEngine(self.db_path)
171
+ result = engine.compact_memory(999)
172
+ assert result["success"] is False
173
+
174
+ def test_restore_nonexistent_archive(self):
175
+ """Restoring memory without archive entry returns failure."""
176
+ from lifecycle.compaction_engine import CompactionEngine
177
+ engine = CompactionEngine(self.db_path)
178
+ result = engine.restore_memory(1) # Memory 1 has no archive
179
+ assert result["success"] is False