superlocalmemory 2.7.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.sh +59 -0
  7. package/mcp_server.py +83 -7
  8. package/package.json +1 -8
  9. package/scripts/generate-thumbnails.py +3 -5
  10. package/skills/slm-build-graph/SKILL.md +1 -1
  11. package/skills/slm-list-recent/SKILL.md +1 -1
  12. package/skills/slm-recall/SKILL.md +1 -1
  13. package/skills/slm-remember/SKILL.md +1 -1
  14. package/skills/slm-show-patterns/SKILL.md +1 -1
  15. package/skills/slm-status/SKILL.md +1 -1
  16. package/skills/slm-switch-profile/SKILL.md +1 -1
  17. package/src/agent_registry.py +7 -18
  18. package/src/auth_middleware.py +3 -5
  19. package/src/auto_backup.py +3 -7
  20. package/src/behavioral/__init__.py +49 -0
  21. package/src/behavioral/behavioral_listener.py +203 -0
  22. package/src/behavioral/behavioral_patterns.py +275 -0
  23. package/src/behavioral/cross_project_transfer.py +206 -0
  24. package/src/behavioral/outcome_inference.py +194 -0
  25. package/src/behavioral/outcome_tracker.py +193 -0
  26. package/src/behavioral/tests/__init__.py +4 -0
  27. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  28. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  29. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  30. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  31. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  32. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  33. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  34. package/src/cache_manager.py +4 -6
  35. package/src/compliance/__init__.py +48 -0
  36. package/src/compliance/abac_engine.py +149 -0
  37. package/src/compliance/abac_middleware.py +116 -0
  38. package/src/compliance/audit_db.py +215 -0
  39. package/src/compliance/audit_logger.py +148 -0
  40. package/src/compliance/retention_manager.py +289 -0
  41. package/src/compliance/retention_scheduler.py +186 -0
  42. package/src/compliance/tests/__init__.py +4 -0
  43. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  44. package/src/compliance/tests/test_abac_engine.py +124 -0
  45. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  46. package/src/compliance/tests/test_audit_db.py +123 -0
  47. package/src/compliance/tests/test_audit_logger.py +98 -0
  48. package/src/compliance/tests/test_mcp_audit.py +128 -0
  49. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  50. package/src/compliance/tests/test_retention_manager.py +131 -0
  51. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  52. package/src/db_connection_manager.py +2 -12
  53. package/src/embedding_engine.py +61 -669
  54. package/src/embeddings/__init__.py +47 -0
  55. package/src/embeddings/cache.py +70 -0
  56. package/src/embeddings/cli.py +113 -0
  57. package/src/embeddings/constants.py +47 -0
  58. package/src/embeddings/database.py +91 -0
  59. package/src/embeddings/engine.py +247 -0
  60. package/src/embeddings/model_loader.py +145 -0
  61. package/src/event_bus.py +3 -13
  62. package/src/graph/__init__.py +36 -0
  63. package/src/graph/build_helpers.py +74 -0
  64. package/src/graph/cli.py +87 -0
  65. package/src/graph/cluster_builder.py +188 -0
  66. package/src/graph/cluster_summary.py +148 -0
  67. package/src/graph/constants.py +47 -0
  68. package/src/graph/edge_builder.py +162 -0
  69. package/src/graph/entity_extractor.py +95 -0
  70. package/src/graph/graph_core.py +226 -0
  71. package/src/graph/graph_search.py +231 -0
  72. package/src/graph/hierarchical.py +207 -0
  73. package/src/graph/schema.py +99 -0
  74. package/src/graph_engine.py +45 -1451
  75. package/src/hnsw_index.py +3 -7
  76. package/src/hybrid_search.py +36 -683
  77. package/src/learning/__init__.py +27 -12
  78. package/src/learning/adaptive_ranker.py +50 -12
  79. package/src/learning/cross_project_aggregator.py +2 -12
  80. package/src/learning/engagement_tracker.py +2 -12
  81. package/src/learning/feature_extractor.py +175 -43
  82. package/src/learning/feedback_collector.py +7 -12
  83. package/src/learning/learning_db.py +180 -12
  84. package/src/learning/project_context_manager.py +2 -12
  85. package/src/learning/source_quality_scorer.py +2 -12
  86. package/src/learning/synthetic_bootstrap.py +2 -12
  87. package/src/learning/tests/__init__.py +2 -0
  88. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  89. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  90. package/src/learning/tests/test_aggregator.py +2 -6
  91. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  92. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  93. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  94. package/src/learning/tests/test_feedback_collector.py +2 -6
  95. package/src/learning/tests/test_learning_db.py +2 -6
  96. package/src/learning/tests/test_learning_db_v28.py +110 -0
  97. package/src/learning/tests/test_learning_init_v28.py +48 -0
  98. package/src/learning/tests/test_outcome_signals.py +48 -0
  99. package/src/learning/tests/test_project_context.py +2 -6
  100. package/src/learning/tests/test_schema_migration.py +319 -0
  101. package/src/learning/tests/test_signal_inference.py +11 -13
  102. package/src/learning/tests/test_source_quality.py +2 -6
  103. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  104. package/src/learning/tests/test_workflow_miner.py +2 -6
  105. package/src/learning/workflow_pattern_miner.py +2 -12
  106. package/src/lifecycle/__init__.py +54 -0
  107. package/src/lifecycle/bounded_growth.py +239 -0
  108. package/src/lifecycle/compaction_engine.py +226 -0
  109. package/src/lifecycle/lifecycle_engine.py +302 -0
  110. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  111. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  112. package/src/lifecycle/retention_policy.py +285 -0
  113. package/src/lifecycle/tests/__init__.py +4 -0
  114. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  115. package/src/lifecycle/tests/test_compaction.py +179 -0
  116. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  117. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  118. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  119. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  120. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  121. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  122. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  123. package/src/mcp_tools_v28.py +280 -0
  124. package/src/memory-profiles.py +2 -12
  125. package/src/memory-reset.py +2 -12
  126. package/src/memory_compression.py +2 -12
  127. package/src/memory_store_v2.py +76 -20
  128. package/src/migrate_v1_to_v2.py +2 -12
  129. package/src/pattern_learner.py +29 -975
  130. package/src/patterns/__init__.py +24 -0
  131. package/src/patterns/analyzers.py +247 -0
  132. package/src/patterns/learner.py +267 -0
  133. package/src/patterns/scoring.py +167 -0
  134. package/src/patterns/store.py +223 -0
  135. package/src/patterns/terminology.py +138 -0
  136. package/src/provenance_tracker.py +4 -14
  137. package/src/query_optimizer.py +4 -6
  138. package/src/rate_limiter.py +2 -6
  139. package/src/search/__init__.py +20 -0
  140. package/src/search/cli.py +77 -0
  141. package/src/search/constants.py +26 -0
  142. package/src/search/engine.py +239 -0
  143. package/src/search/fusion.py +122 -0
  144. package/src/search/index_loader.py +112 -0
  145. package/src/search/methods.py +162 -0
  146. package/src/search_engine_v2.py +4 -6
  147. package/src/setup_validator.py +7 -13
  148. package/src/subscription_manager.py +2 -12
  149. package/src/tree/__init__.py +59 -0
  150. package/src/tree/builder.py +183 -0
  151. package/src/tree/nodes.py +196 -0
  152. package/src/tree/queries.py +252 -0
  153. package/src/tree/schema.py +76 -0
  154. package/src/tree_manager.py +10 -711
  155. package/src/trust/__init__.py +45 -0
  156. package/src/trust/constants.py +66 -0
  157. package/src/trust/queries.py +157 -0
  158. package/src/trust/schema.py +95 -0
  159. package/src/trust/scorer.py +299 -0
  160. package/src/trust/signals.py +95 -0
  161. package/src/trust_scorer.py +39 -697
  162. package/src/webhook_dispatcher.py +2 -12
  163. package/ui/app.js +1 -1
  164. package/ui/js/agents.js +1 -1
  165. package/ui_server.py +2 -14
  166. package/ATTRIBUTION.md +0 -140
  167. package/docs/ARCHITECTURE-V2.5.md +0 -190
  168. package/docs/GRAPH-ENGINE.md +0 -503
  169. package/docs/architecture-diagram.drawio +0 -405
  170. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,215 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Audit database management with hash chain tamper detection.
4
+
5
+ Provides a tamper-evident audit trail stored in a separate audit.db.
6
+ Each entry's hash incorporates the previous entry's hash, forming a
7
+ chain that can detect any modification to historical records.
8
+ """
9
+ import hashlib
10
+ import json
11
+ import sqlite3
12
+ import threading
13
+ from datetime import datetime, timezone
14
+ from typing import Any, Dict, List, Optional
15
+
16
+
17
+ _GENESIS = "genesis"
18
+
19
+ _SCHEMA = """
20
+ CREATE TABLE IF NOT EXISTS audit_events (
21
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
22
+ event_type TEXT NOT NULL,
23
+ actor TEXT NOT NULL,
24
+ resource_id INTEGER,
25
+ details TEXT DEFAULT '{}',
26
+ prev_hash TEXT NOT NULL,
27
+ entry_hash TEXT NOT NULL,
28
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
29
+ )
30
+ """
31
+
32
+
33
+ def _compute_hash(
34
+ event_type: str,
35
+ actor: str,
36
+ resource_id: Optional[int],
37
+ details: str,
38
+ prev_hash: str,
39
+ created_at: str,
40
+ ) -> str:
41
+ """Compute SHA-256 hash for an audit entry."""
42
+ payload = (
43
+ f"{event_type}{actor}{resource_id}{details}{prev_hash}{created_at}"
44
+ )
45
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
46
+
47
+
48
+ class AuditDB:
49
+ """Manages audit.db -- tamper-evident compliance audit trail.
50
+
51
+ The hash chain guarantees that any modification to a stored event
52
+ will be detected by verify_chain(). The first entry uses a fixed
53
+ genesis value as its previous hash.
54
+ """
55
+
56
+ def __init__(self, db_path: Optional[str] = None):
57
+ self._db_path = db_path
58
+ self._lock = threading.Lock()
59
+ if db_path:
60
+ self._init_db()
61
+
62
+ # ------------------------------------------------------------------
63
+ # Internal helpers
64
+ # ------------------------------------------------------------------
65
+
66
+ def _get_conn(self) -> sqlite3.Connection:
67
+ conn = sqlite3.connect(self._db_path)
68
+ conn.execute("PRAGMA journal_mode=WAL")
69
+ conn.execute("PRAGMA foreign_keys=ON")
70
+ conn.row_factory = sqlite3.Row
71
+ return conn
72
+
73
+ def _init_db(self) -> None:
74
+ conn = self._get_conn()
75
+ try:
76
+ conn.executescript(_SCHEMA)
77
+ conn.commit()
78
+ finally:
79
+ conn.close()
80
+
81
+ # ------------------------------------------------------------------
82
+ # Public API
83
+ # ------------------------------------------------------------------
84
+
85
+ def log_event(
86
+ self,
87
+ event_type: str,
88
+ actor: str = "system",
89
+ resource_id: Optional[int] = None,
90
+ details: Optional[Dict[str, Any]] = None,
91
+ ) -> int:
92
+ """Append an event to the audit trail and return its row id."""
93
+ details_str = json.dumps(details or {}, sort_keys=True)
94
+ created_at = datetime.now(timezone.utc).isoformat()
95
+
96
+ with self._lock:
97
+ conn = self._get_conn()
98
+ try:
99
+ # Fetch the hash of the most recent entry (or genesis)
100
+ row = conn.execute(
101
+ "SELECT entry_hash FROM audit_events "
102
+ "ORDER BY id DESC LIMIT 1"
103
+ ).fetchone()
104
+ prev_hash = row["entry_hash"] if row else _GENESIS
105
+
106
+ entry_hash = _compute_hash(
107
+ event_type, actor, resource_id,
108
+ details_str, prev_hash, created_at,
109
+ )
110
+
111
+ cursor = conn.execute(
112
+ "INSERT INTO audit_events "
113
+ "(event_type, actor, resource_id, details, "
114
+ " prev_hash, entry_hash, created_at) "
115
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
116
+ (
117
+ event_type, actor, resource_id,
118
+ details_str, prev_hash, entry_hash, created_at,
119
+ ),
120
+ )
121
+ conn.commit()
122
+ return cursor.lastrowid
123
+ finally:
124
+ conn.close()
125
+
126
+ def query_events(
127
+ self,
128
+ event_type: Optional[str] = None,
129
+ actor: Optional[str] = None,
130
+ resource_id: Optional[int] = None,
131
+ limit: int = 100,
132
+ ) -> List[Dict[str, Any]]:
133
+ """Query audit events with optional filters."""
134
+ clauses: List[str] = []
135
+ params: List[Any] = []
136
+
137
+ if event_type is not None:
138
+ clauses.append("event_type = ?")
139
+ params.append(event_type)
140
+ if actor is not None:
141
+ clauses.append("actor = ?")
142
+ params.append(actor)
143
+ if resource_id is not None:
144
+ clauses.append("resource_id = ?")
145
+ params.append(resource_id)
146
+
147
+ where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
148
+ sql = (
149
+ f"SELECT id, event_type, actor, resource_id, details, "
150
+ f"prev_hash, entry_hash, created_at "
151
+ f"FROM audit_events {where} "
152
+ f"ORDER BY id DESC LIMIT ?"
153
+ )
154
+ params.append(limit)
155
+
156
+ conn = self._get_conn()
157
+ try:
158
+ rows = conn.execute(sql, params).fetchall()
159
+ return [dict(r) for r in rows]
160
+ finally:
161
+ conn.close()
162
+
163
+ def verify_chain(self) -> Dict[str, Any]:
164
+ """Verify the integrity of the entire hash chain.
165
+
166
+ Returns a dict with:
167
+ valid -- bool, True if chain is intact
168
+ entries_checked -- int, number of entries verified
169
+ error -- str or None, description of first failure
170
+ """
171
+ conn = self._get_conn()
172
+ try:
173
+ rows = conn.execute(
174
+ "SELECT id, event_type, actor, resource_id, details, "
175
+ "prev_hash, entry_hash, created_at "
176
+ "FROM audit_events ORDER BY id"
177
+ ).fetchall()
178
+ finally:
179
+ conn.close()
180
+
181
+ if not rows:
182
+ return {"valid": True, "entries_checked": 0, "error": None}
183
+
184
+ expected_prev = _GENESIS
185
+ for row in rows:
186
+ row = dict(row)
187
+ # Check the prev_hash link
188
+ if row["prev_hash"] != expected_prev:
189
+ return {
190
+ "valid": False,
191
+ "entries_checked": row["id"],
192
+ "error": f"prev_hash mismatch at entry {row['id']}",
193
+ }
194
+ # Recompute the entry hash
195
+ computed = _compute_hash(
196
+ row["event_type"],
197
+ row["actor"],
198
+ row["resource_id"],
199
+ row["details"],
200
+ row["prev_hash"],
201
+ row["created_at"],
202
+ )
203
+ if computed != row["entry_hash"]:
204
+ return {
205
+ "valid": False,
206
+ "entries_checked": row["id"],
207
+ "error": f"entry_hash mismatch at entry {row['id']}",
208
+ }
209
+ expected_prev = row["entry_hash"]
210
+
211
+ return {
212
+ "valid": True,
213
+ "entries_checked": len(rows),
214
+ "error": None,
215
+ }
@@ -0,0 +1,148 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """EventBus listener that writes all events to audit.db.
4
+
5
+ Bridges the EventBus (real-time event emission) with AuditDB (tamper-evident
6
+ audit trail). Every event that passes through the EventBus gets persisted
7
+ into audit.db with full hash-chain integrity.
8
+
9
+ Thread-safe: handle_event() runs on the emitter's thread and must be fast.
10
+ Graceful: malformed events are logged defensively, never crash the caller.
11
+ """
12
+ import json
13
+ import logging
14
+ import threading
15
+ from typing import Any, Dict, Optional
16
+
17
+ from .audit_db import AuditDB
18
+
19
+ logger = logging.getLogger("superlocalmemory.compliance.audit_logger")
20
+
21
+
22
+ class AuditLogger:
23
+ """Listens to EventBus events and writes them to audit.db.
24
+
25
+ Usage:
26
+ audit_logger = AuditLogger("/path/to/audit.db")
27
+ audit_logger.register_with_eventbus() # auto-subscribe
28
+
29
+ Or manually:
30
+ event_bus.add_listener(audit_logger.handle_event)
31
+ """
32
+
33
+ def __init__(self, audit_db_path: str):
34
+ self._audit_db = AuditDB(audit_db_path)
35
+ self._lock = threading.Lock()
36
+ self._events_logged: int = 0
37
+ self._errors: int = 0
38
+ self._registered: bool = False
39
+
40
+ # ------------------------------------------------------------------
41
+ # Public API
42
+ # ------------------------------------------------------------------
43
+
44
+ @property
45
+ def events_logged(self) -> int:
46
+ """Total number of events successfully written to audit.db."""
47
+ return self._events_logged
48
+
49
+ def handle_event(self, event: Dict[str, Any]) -> None:
50
+ """Process a single EventBus event and write it to audit.db.
51
+
52
+ Extracts event_type, source_agent (actor), memory_id (resource_id),
53
+ and payload (details) from the event dict, then delegates to
54
+ AuditDB.log_event().
55
+
56
+ This method MUST NOT raise — it runs on the emitter's thread.
57
+ Any failure is caught, logged, and counted in self._errors.
58
+
59
+ Args:
60
+ event: Dict emitted by EventBus. Expected keys:
61
+ event_type, source_agent, memory_id, payload, timestamp.
62
+ All keys are optional for graceful degradation.
63
+ """
64
+ try:
65
+ if not isinstance(event, dict):
66
+ logger.warning("AuditLogger received non-dict event: %s", type(event))
67
+ return
68
+
69
+ event_type = event.get("event_type", "unknown")
70
+ actor = event.get("source_agent", "system")
71
+ resource_id = event.get("memory_id")
72
+ payload = event.get("payload", {})
73
+
74
+ # Build details dict including any extra context
75
+ details = {}
76
+ if isinstance(payload, dict):
77
+ details.update(payload)
78
+ else:
79
+ details["raw_payload"] = str(payload)
80
+
81
+ # Include timestamp from event if present
82
+ ts = event.get("timestamp")
83
+ if ts:
84
+ details["event_timestamp"] = ts
85
+
86
+ with self._lock:
87
+ self._audit_db.log_event(
88
+ event_type=event_type,
89
+ actor=actor,
90
+ resource_id=resource_id,
91
+ details=details,
92
+ )
93
+ self._events_logged += 1
94
+
95
+ except Exception as exc:
96
+ self._errors += 1
97
+ logger.error(
98
+ "AuditLogger failed to log event: %s (event=%s)",
99
+ exc,
100
+ _safe_repr(event),
101
+ )
102
+
103
+ def register_with_eventbus(self) -> bool:
104
+ """Register this logger as an EventBus listener.
105
+
106
+ Attempts to find the EventBus singleton and subscribe
107
+ handle_event as a listener. Returns True on success,
108
+ False if EventBus is unavailable.
109
+
110
+ Graceful: never raises; returns False on any failure.
111
+ """
112
+ try:
113
+ from event_bus import EventBus as EB
114
+
115
+ bus = EB.get_instance()
116
+ bus.add_listener(self.handle_event)
117
+ self._registered = True
118
+ logger.info("AuditLogger registered with EventBus")
119
+ return True
120
+ except Exception as exc:
121
+ logger.warning("AuditLogger could not register with EventBus: %s", exc)
122
+ self._registered = False
123
+ return False
124
+
125
+ def get_status(self) -> Dict[str, Any]:
126
+ """Return diagnostic status of this audit logger.
127
+
128
+ Returns:
129
+ Dict with keys: events_logged, errors, registered.
130
+ """
131
+ return {
132
+ "events_logged": self._events_logged,
133
+ "errors": self._errors,
134
+ "registered": self._registered,
135
+ }
136
+
137
+
138
+ # ------------------------------------------------------------------
139
+ # Internal helpers
140
+ # ------------------------------------------------------------------
141
+
142
+ def _safe_repr(obj: Any, max_len: int = 200) -> str:
143
+ """Safe repr that truncates and never raises."""
144
+ try:
145
+ r = repr(obj)
146
+ return r[:max_len] + "..." if len(r) > max_len else r
147
+ except Exception:
148
+ return "<unrepresentable>"
@@ -0,0 +1,289 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """Compliance retention manager — regulatory retention enforcement.
4
+
5
+ Unlike the lifecycle ``retention_policy.py`` (which manages lifecycle-level
6
+ policies stored alongside memory.db), this compliance module is the
7
+ *regulatory* layer that:
8
+
9
+ - Links retention rules to regulatory frameworks (GDPR, EU AI Act, HIPAA).
10
+ - Enforces GDPR right-to-erasure (tombstone memory + preserve audit trail).
11
+ - Enforces EU AI Act audit retention (10-year minimum for audit records).
12
+ - Records every retention action in audit.db for tamper-evident compliance.
13
+
14
+ Rules are stored in audit.db (``compliance_retention_rules`` table) so that
15
+ the audit database remains the single source of truth for all compliance
16
+ configuration and evidence.
17
+ """
18
+ import hashlib
19
+ import json
20
+ import logging
21
+ import sqlite3
22
+ from datetime import datetime, timezone
23
+ from typing import Any, Dict, List, Optional
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _RULES_TABLE_SQL = """
28
+ CREATE TABLE IF NOT EXISTS compliance_retention_rules (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ name TEXT NOT NULL,
31
+ framework TEXT NOT NULL,
32
+ retention_days INTEGER NOT NULL,
33
+ action TEXT NOT NULL,
34
+ applies_to TEXT NOT NULL,
35
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
36
+ )
37
+ """
38
+
39
+ _AUDIT_EVENTS_TABLE_SQL = """
40
+ CREATE TABLE IF NOT EXISTS audit_events (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ event_type TEXT NOT NULL,
43
+ actor TEXT NOT NULL,
44
+ resource_id INTEGER,
45
+ details TEXT DEFAULT '{}',
46
+ prev_hash TEXT NOT NULL DEFAULT 'genesis',
47
+ entry_hash TEXT NOT NULL DEFAULT '',
48
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
49
+ )
50
+ """
51
+
52
+
53
+ def _compute_hash(event_type: str, actor: str, resource_id: Any,
54
+ details: str, prev_hash: str, ts: str) -> str:
55
+ """Compute a SHA-256 hash for a single audit event."""
56
+ payload = f"{event_type}|{actor}|{resource_id}|{details}|{prev_hash}|{ts}"
57
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
58
+
59
+
60
+ class ComplianceRetentionManager:
61
+ """Enforces regulatory retention policies across memory and audit DBs.
62
+
63
+ Connects to *both* databases:
64
+ - ``memory_db_path``: where memories live (tombstoning happens here).
65
+ - ``audit_db_path``: where rules and audit events are stored.
66
+ """
67
+
68
+ def __init__(self, memory_db_path: str, audit_db_path: str):
69
+ self._memory_db_path = memory_db_path
70
+ self._audit_db_path = audit_db_path
71
+ self._ensure_tables()
72
+
73
+ # ------------------------------------------------------------------
74
+ # Internal helpers
75
+ # ------------------------------------------------------------------
76
+
77
+ def _connect_audit(self) -> sqlite3.Connection:
78
+ conn = sqlite3.connect(self._audit_db_path)
79
+ conn.row_factory = sqlite3.Row
80
+ return conn
81
+
82
+ def _connect_memory(self) -> sqlite3.Connection:
83
+ conn = sqlite3.connect(self._memory_db_path)
84
+ conn.row_factory = sqlite3.Row
85
+ return conn
86
+
87
+ def _ensure_tables(self) -> None:
88
+ conn = self._connect_audit()
89
+ try:
90
+ conn.execute(_RULES_TABLE_SQL)
91
+ conn.execute(_AUDIT_EVENTS_TABLE_SQL)
92
+ conn.commit()
93
+ finally:
94
+ conn.close()
95
+
96
+ def _log_audit_event(self, event_type: str, actor: str,
97
+ resource_id: Optional[int],
98
+ details: Dict[str, Any]) -> None:
99
+ """Append a tamper-evident audit event to audit.db."""
100
+ conn = self._connect_audit()
101
+ try:
102
+ last = conn.execute(
103
+ "SELECT entry_hash FROM audit_events ORDER BY id DESC LIMIT 1"
104
+ ).fetchone()
105
+ prev_hash = last["entry_hash"] if last else "genesis"
106
+ ts = datetime.now(timezone.utc).isoformat()
107
+ details_json = json.dumps(details, default=str)
108
+ entry_hash = _compute_hash(
109
+ event_type, actor, resource_id, details_json, prev_hash, ts,
110
+ )
111
+ conn.execute(
112
+ "INSERT INTO audit_events "
113
+ "(event_type, actor, resource_id, details, prev_hash, entry_hash, created_at) "
114
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
115
+ (event_type, actor, resource_id, details_json, prev_hash,
116
+ entry_hash, ts),
117
+ )
118
+ conn.commit()
119
+ finally:
120
+ conn.close()
121
+
122
+ @staticmethod
123
+ def _parse_json(value: Any) -> Any:
124
+ if isinstance(value, str):
125
+ try:
126
+ return json.loads(value)
127
+ except (json.JSONDecodeError, TypeError):
128
+ return value
129
+ return value if value is not None else []
130
+
131
+ @staticmethod
132
+ def _matches(criteria: Any, mem_tags: Any, mem_project: Optional[str]) -> bool:
133
+ """Return True when a rule's ``applies_to`` matches the memory."""
134
+ if not isinstance(criteria, dict) or not criteria:
135
+ return False
136
+ ok = True
137
+ if "tags" in criteria:
138
+ rule_tags = set(criteria["tags"]) if criteria["tags"] else set()
139
+ m_tags = set(mem_tags) if isinstance(mem_tags, list) else set()
140
+ if not rule_tags & m_tags:
141
+ ok = False
142
+ if "project_name" in criteria:
143
+ if mem_project != criteria["project_name"]:
144
+ ok = False
145
+ return ok
146
+
147
+ # ------------------------------------------------------------------
148
+ # Public API
149
+ # ------------------------------------------------------------------
150
+
151
+ def create_retention_rule(self, name: str, framework: str,
152
+ retention_days: int, action: str,
153
+ applies_to: Dict[str, Any]) -> int:
154
+ """Create a compliance retention rule in audit.db.
155
+
156
+ Returns the auto-generated rule ID.
157
+ """
158
+ conn = self._connect_audit()
159
+ try:
160
+ cur = conn.execute(
161
+ "INSERT INTO compliance_retention_rules "
162
+ "(name, framework, retention_days, action, applies_to) "
163
+ "VALUES (?, ?, ?, ?, ?)",
164
+ (name, framework, retention_days, action,
165
+ json.dumps(applies_to)),
166
+ )
167
+ conn.commit()
168
+ rule_id = cur.lastrowid
169
+ finally:
170
+ conn.close()
171
+
172
+ self._log_audit_event(
173
+ "retention.rule_created", "system", rule_id,
174
+ {"name": name, "framework": framework},
175
+ )
176
+ return rule_id
177
+
178
+ def list_rules(self) -> List[Dict[str, Any]]:
179
+ """Return all compliance retention rules."""
180
+ conn = self._connect_audit()
181
+ try:
182
+ rows = conn.execute(
183
+ "SELECT * FROM compliance_retention_rules ORDER BY id"
184
+ ).fetchall()
185
+ result = []
186
+ for r in rows:
187
+ d = dict(r)
188
+ if isinstance(d.get("applies_to"), str):
189
+ d["applies_to"] = self._parse_json(d["applies_to"])
190
+ result.append(d)
191
+ return result
192
+ finally:
193
+ conn.close()
194
+
195
+ def evaluate_memory(self, memory_id: int) -> Optional[Dict[str, Any]]:
196
+ """Check which compliance rule applies to a memory.
197
+
198
+ Reads the memory's tags/project from memory.db, then evaluates
199
+ all rules from audit.db. The first matching rule (ordered by id)
200
+ is returned.
201
+
202
+ Returns a dict with ``rule_name``, ``action``, ``retention_days``,
203
+ ``framework``; or ``None`` if no rule matches.
204
+ """
205
+ mem_conn = self._connect_memory()
206
+ try:
207
+ mem = mem_conn.execute(
208
+ "SELECT tags, project_name FROM memories WHERE id = ?",
209
+ (memory_id,),
210
+ ).fetchone()
211
+ if mem is None:
212
+ return None
213
+ mem_tags = self._parse_json(mem["tags"])
214
+ mem_project = mem["project_name"]
215
+ finally:
216
+ mem_conn.close()
217
+
218
+ audit_conn = self._connect_audit()
219
+ try:
220
+ rules = audit_conn.execute(
221
+ "SELECT * FROM compliance_retention_rules ORDER BY id"
222
+ ).fetchall()
223
+ for rule in rules:
224
+ criteria = self._parse_json(rule["applies_to"])
225
+ if self._matches(criteria, mem_tags, mem_project):
226
+ return {
227
+ "rule_name": rule["name"],
228
+ "action": rule["action"],
229
+ "retention_days": rule["retention_days"],
230
+ "framework": rule["framework"],
231
+ }
232
+ return None
233
+ finally:
234
+ audit_conn.close()
235
+
236
+ def execute_erasure_request(self, memory_id: int, framework: str,
237
+ requested_by: str) -> Dict[str, Any]:
238
+ """Execute a GDPR (or other framework) right-to-erasure request.
239
+
240
+ 1. Tombstones the memory in memory.db.
241
+ 2. Logs the erasure event in audit.db (preserving the audit trail).
242
+
243
+ Returns a result dict with ``success``, ``action``, and ``memory_id``.
244
+ """
245
+ mem_conn = self._connect_memory()
246
+ try:
247
+ row = mem_conn.execute(
248
+ "SELECT id FROM memories WHERE id = ?", (memory_id,),
249
+ ).fetchone()
250
+ if row is None:
251
+ return {"success": False, "error": "memory_not_found",
252
+ "memory_id": memory_id}
253
+
254
+ ts = datetime.now(timezone.utc).isoformat()
255
+ mem_conn.execute(
256
+ "UPDATE memories SET lifecycle_state = 'tombstoned', "
257
+ "lifecycle_updated_at = ? WHERE id = ?",
258
+ (ts, memory_id),
259
+ )
260
+ mem_conn.commit()
261
+ finally:
262
+ mem_conn.close()
263
+
264
+ self._log_audit_event(
265
+ "retention.erasure", requested_by, memory_id,
266
+ {"framework": framework, "action": "tombstoned"},
267
+ )
268
+
269
+ return {"success": True, "action": "tombstoned",
270
+ "memory_id": memory_id}
271
+
272
+ def get_compliance_status(self) -> Dict[str, Any]:
273
+ """Return a summary of current compliance retention state."""
274
+ conn = self._connect_audit()
275
+ try:
276
+ rules = conn.execute(
277
+ "SELECT * FROM compliance_retention_rules"
278
+ ).fetchall()
279
+ frameworks = list({r["framework"] for r in rules})
280
+ events_count = conn.execute(
281
+ "SELECT COUNT(*) AS cnt FROM audit_events"
282
+ ).fetchone()["cnt"]
283
+ return {
284
+ "rules_count": len(rules),
285
+ "frameworks": sorted(frameworks),
286
+ "audit_events_count": events_count,
287
+ }
288
+ finally:
289
+ conn.close()