superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""Audit database management with hash chain tamper detection.
|
|
4
|
+
|
|
5
|
+
Provides a tamper-evident audit trail stored in a separate audit.db.
|
|
6
|
+
Each entry's hash incorporates the previous entry's hash, forming a
|
|
7
|
+
chain that can detect any modification to historical records.
|
|
8
|
+
"""
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import sqlite3
|
|
12
|
+
import threading
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_GENESIS = "genesis"
|
|
18
|
+
|
|
19
|
+
_SCHEMA = """
|
|
20
|
+
CREATE TABLE IF NOT EXISTS audit_events (
|
|
21
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
event_type TEXT NOT NULL,
|
|
23
|
+
actor TEXT NOT NULL,
|
|
24
|
+
resource_id INTEGER,
|
|
25
|
+
details TEXT DEFAULT '{}',
|
|
26
|
+
prev_hash TEXT NOT NULL,
|
|
27
|
+
entry_hash TEXT NOT NULL,
|
|
28
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
29
|
+
)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _compute_hash(
|
|
34
|
+
event_type: str,
|
|
35
|
+
actor: str,
|
|
36
|
+
resource_id: Optional[int],
|
|
37
|
+
details: str,
|
|
38
|
+
prev_hash: str,
|
|
39
|
+
created_at: str,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Compute SHA-256 hash for an audit entry."""
|
|
42
|
+
payload = (
|
|
43
|
+
f"{event_type}{actor}{resource_id}{details}{prev_hash}{created_at}"
|
|
44
|
+
)
|
|
45
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AuditDB:
|
|
49
|
+
"""Manages audit.db -- tamper-evident compliance audit trail.
|
|
50
|
+
|
|
51
|
+
The hash chain guarantees that any modification to a stored event
|
|
52
|
+
will be detected by verify_chain(). The first entry uses a fixed
|
|
53
|
+
genesis value as its previous hash.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
57
|
+
self._db_path = db_path
|
|
58
|
+
self._lock = threading.Lock()
|
|
59
|
+
if db_path:
|
|
60
|
+
self._init_db()
|
|
61
|
+
|
|
62
|
+
# ------------------------------------------------------------------
|
|
63
|
+
# Internal helpers
|
|
64
|
+
# ------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
67
|
+
conn = sqlite3.connect(self._db_path)
|
|
68
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
69
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
70
|
+
conn.row_factory = sqlite3.Row
|
|
71
|
+
return conn
|
|
72
|
+
|
|
73
|
+
def _init_db(self) -> None:
|
|
74
|
+
conn = self._get_conn()
|
|
75
|
+
try:
|
|
76
|
+
conn.executescript(_SCHEMA)
|
|
77
|
+
conn.commit()
|
|
78
|
+
finally:
|
|
79
|
+
conn.close()
|
|
80
|
+
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
# Public API
|
|
83
|
+
# ------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
def log_event(
|
|
86
|
+
self,
|
|
87
|
+
event_type: str,
|
|
88
|
+
actor: str = "system",
|
|
89
|
+
resource_id: Optional[int] = None,
|
|
90
|
+
details: Optional[Dict[str, Any]] = None,
|
|
91
|
+
) -> int:
|
|
92
|
+
"""Append an event to the audit trail and return its row id."""
|
|
93
|
+
details_str = json.dumps(details or {}, sort_keys=True)
|
|
94
|
+
created_at = datetime.now(timezone.utc).isoformat()
|
|
95
|
+
|
|
96
|
+
with self._lock:
|
|
97
|
+
conn = self._get_conn()
|
|
98
|
+
try:
|
|
99
|
+
# Fetch the hash of the most recent entry (or genesis)
|
|
100
|
+
row = conn.execute(
|
|
101
|
+
"SELECT entry_hash FROM audit_events "
|
|
102
|
+
"ORDER BY id DESC LIMIT 1"
|
|
103
|
+
).fetchone()
|
|
104
|
+
prev_hash = row["entry_hash"] if row else _GENESIS
|
|
105
|
+
|
|
106
|
+
entry_hash = _compute_hash(
|
|
107
|
+
event_type, actor, resource_id,
|
|
108
|
+
details_str, prev_hash, created_at,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
cursor = conn.execute(
|
|
112
|
+
"INSERT INTO audit_events "
|
|
113
|
+
"(event_type, actor, resource_id, details, "
|
|
114
|
+
" prev_hash, entry_hash, created_at) "
|
|
115
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
116
|
+
(
|
|
117
|
+
event_type, actor, resource_id,
|
|
118
|
+
details_str, prev_hash, entry_hash, created_at,
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
conn.commit()
|
|
122
|
+
return cursor.lastrowid
|
|
123
|
+
finally:
|
|
124
|
+
conn.close()
|
|
125
|
+
|
|
126
|
+
def query_events(
|
|
127
|
+
self,
|
|
128
|
+
event_type: Optional[str] = None,
|
|
129
|
+
actor: Optional[str] = None,
|
|
130
|
+
resource_id: Optional[int] = None,
|
|
131
|
+
limit: int = 100,
|
|
132
|
+
) -> List[Dict[str, Any]]:
|
|
133
|
+
"""Query audit events with optional filters."""
|
|
134
|
+
clauses: List[str] = []
|
|
135
|
+
params: List[Any] = []
|
|
136
|
+
|
|
137
|
+
if event_type is not None:
|
|
138
|
+
clauses.append("event_type = ?")
|
|
139
|
+
params.append(event_type)
|
|
140
|
+
if actor is not None:
|
|
141
|
+
clauses.append("actor = ?")
|
|
142
|
+
params.append(actor)
|
|
143
|
+
if resource_id is not None:
|
|
144
|
+
clauses.append("resource_id = ?")
|
|
145
|
+
params.append(resource_id)
|
|
146
|
+
|
|
147
|
+
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
|
|
148
|
+
sql = (
|
|
149
|
+
f"SELECT id, event_type, actor, resource_id, details, "
|
|
150
|
+
f"prev_hash, entry_hash, created_at "
|
|
151
|
+
f"FROM audit_events {where} "
|
|
152
|
+
f"ORDER BY id DESC LIMIT ?"
|
|
153
|
+
)
|
|
154
|
+
params.append(limit)
|
|
155
|
+
|
|
156
|
+
conn = self._get_conn()
|
|
157
|
+
try:
|
|
158
|
+
rows = conn.execute(sql, params).fetchall()
|
|
159
|
+
return [dict(r) for r in rows]
|
|
160
|
+
finally:
|
|
161
|
+
conn.close()
|
|
162
|
+
|
|
163
|
+
def verify_chain(self) -> Dict[str, Any]:
|
|
164
|
+
"""Verify the integrity of the entire hash chain.
|
|
165
|
+
|
|
166
|
+
Returns a dict with:
|
|
167
|
+
valid -- bool, True if chain is intact
|
|
168
|
+
entries_checked -- int, number of entries verified
|
|
169
|
+
error -- str or None, description of first failure
|
|
170
|
+
"""
|
|
171
|
+
conn = self._get_conn()
|
|
172
|
+
try:
|
|
173
|
+
rows = conn.execute(
|
|
174
|
+
"SELECT id, event_type, actor, resource_id, details, "
|
|
175
|
+
"prev_hash, entry_hash, created_at "
|
|
176
|
+
"FROM audit_events ORDER BY id"
|
|
177
|
+
).fetchall()
|
|
178
|
+
finally:
|
|
179
|
+
conn.close()
|
|
180
|
+
|
|
181
|
+
if not rows:
|
|
182
|
+
return {"valid": True, "entries_checked": 0, "error": None}
|
|
183
|
+
|
|
184
|
+
expected_prev = _GENESIS
|
|
185
|
+
for row in rows:
|
|
186
|
+
row = dict(row)
|
|
187
|
+
# Check the prev_hash link
|
|
188
|
+
if row["prev_hash"] != expected_prev:
|
|
189
|
+
return {
|
|
190
|
+
"valid": False,
|
|
191
|
+
"entries_checked": row["id"],
|
|
192
|
+
"error": f"prev_hash mismatch at entry {row['id']}",
|
|
193
|
+
}
|
|
194
|
+
# Recompute the entry hash
|
|
195
|
+
computed = _compute_hash(
|
|
196
|
+
row["event_type"],
|
|
197
|
+
row["actor"],
|
|
198
|
+
row["resource_id"],
|
|
199
|
+
row["details"],
|
|
200
|
+
row["prev_hash"],
|
|
201
|
+
row["created_at"],
|
|
202
|
+
)
|
|
203
|
+
if computed != row["entry_hash"]:
|
|
204
|
+
return {
|
|
205
|
+
"valid": False,
|
|
206
|
+
"entries_checked": row["id"],
|
|
207
|
+
"error": f"entry_hash mismatch at entry {row['id']}",
|
|
208
|
+
}
|
|
209
|
+
expected_prev = row["entry_hash"]
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"valid": True,
|
|
213
|
+
"entries_checked": len(rows),
|
|
214
|
+
"error": None,
|
|
215
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""EventBus listener that writes all events to audit.db.
|
|
4
|
+
|
|
5
|
+
Bridges the EventBus (real-time event emission) with AuditDB (tamper-evident
|
|
6
|
+
audit trail). Every event that passes through the EventBus gets persisted
|
|
7
|
+
into audit.db with full hash-chain integrity.
|
|
8
|
+
|
|
9
|
+
Thread-safe: handle_event() runs on the emitter's thread and must be fast.
|
|
10
|
+
Graceful: malformed events are logged defensively, never crash the caller.
|
|
11
|
+
"""
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import threading
|
|
15
|
+
from typing import Any, Dict, Optional
|
|
16
|
+
|
|
17
|
+
from .audit_db import AuditDB
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("superlocalmemory.compliance.audit_logger")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AuditLogger:
|
|
23
|
+
"""Listens to EventBus events and writes them to audit.db.
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
audit_logger = AuditLogger("/path/to/audit.db")
|
|
27
|
+
audit_logger.register_with_eventbus() # auto-subscribe
|
|
28
|
+
|
|
29
|
+
Or manually:
|
|
30
|
+
event_bus.add_listener(audit_logger.handle_event)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, audit_db_path: str):
|
|
34
|
+
self._audit_db = AuditDB(audit_db_path)
|
|
35
|
+
self._lock = threading.Lock()
|
|
36
|
+
self._events_logged: int = 0
|
|
37
|
+
self._errors: int = 0
|
|
38
|
+
self._registered: bool = False
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------
|
|
41
|
+
# Public API
|
|
42
|
+
# ------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def events_logged(self) -> int:
|
|
46
|
+
"""Total number of events successfully written to audit.db."""
|
|
47
|
+
return self._events_logged
|
|
48
|
+
|
|
49
|
+
def handle_event(self, event: Dict[str, Any]) -> None:
|
|
50
|
+
"""Process a single EventBus event and write it to audit.db.
|
|
51
|
+
|
|
52
|
+
Extracts event_type, source_agent (actor), memory_id (resource_id),
|
|
53
|
+
and payload (details) from the event dict, then delegates to
|
|
54
|
+
AuditDB.log_event().
|
|
55
|
+
|
|
56
|
+
This method MUST NOT raise — it runs on the emitter's thread.
|
|
57
|
+
Any failure is caught, logged, and counted in self._errors.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
event: Dict emitted by EventBus. Expected keys:
|
|
61
|
+
event_type, source_agent, memory_id, payload, timestamp.
|
|
62
|
+
All keys are optional for graceful degradation.
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
if not isinstance(event, dict):
|
|
66
|
+
logger.warning("AuditLogger received non-dict event: %s", type(event))
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
event_type = event.get("event_type", "unknown")
|
|
70
|
+
actor = event.get("source_agent", "system")
|
|
71
|
+
resource_id = event.get("memory_id")
|
|
72
|
+
payload = event.get("payload", {})
|
|
73
|
+
|
|
74
|
+
# Build details dict including any extra context
|
|
75
|
+
details = {}
|
|
76
|
+
if isinstance(payload, dict):
|
|
77
|
+
details.update(payload)
|
|
78
|
+
else:
|
|
79
|
+
details["raw_payload"] = str(payload)
|
|
80
|
+
|
|
81
|
+
# Include timestamp from event if present
|
|
82
|
+
ts = event.get("timestamp")
|
|
83
|
+
if ts:
|
|
84
|
+
details["event_timestamp"] = ts
|
|
85
|
+
|
|
86
|
+
with self._lock:
|
|
87
|
+
self._audit_db.log_event(
|
|
88
|
+
event_type=event_type,
|
|
89
|
+
actor=actor,
|
|
90
|
+
resource_id=resource_id,
|
|
91
|
+
details=details,
|
|
92
|
+
)
|
|
93
|
+
self._events_logged += 1
|
|
94
|
+
|
|
95
|
+
except Exception as exc:
|
|
96
|
+
self._errors += 1
|
|
97
|
+
logger.error(
|
|
98
|
+
"AuditLogger failed to log event: %s (event=%s)",
|
|
99
|
+
exc,
|
|
100
|
+
_safe_repr(event),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def register_with_eventbus(self) -> bool:
|
|
104
|
+
"""Register this logger as an EventBus listener.
|
|
105
|
+
|
|
106
|
+
Attempts to find the EventBus singleton and subscribe
|
|
107
|
+
handle_event as a listener. Returns True on success,
|
|
108
|
+
False if EventBus is unavailable.
|
|
109
|
+
|
|
110
|
+
Graceful: never raises; returns False on any failure.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from event_bus import EventBus as EB
|
|
114
|
+
|
|
115
|
+
bus = EB.get_instance()
|
|
116
|
+
bus.add_listener(self.handle_event)
|
|
117
|
+
self._registered = True
|
|
118
|
+
logger.info("AuditLogger registered with EventBus")
|
|
119
|
+
return True
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
logger.warning("AuditLogger could not register with EventBus: %s", exc)
|
|
122
|
+
self._registered = False
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
def get_status(self) -> Dict[str, Any]:
|
|
126
|
+
"""Return diagnostic status of this audit logger.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Dict with keys: events_logged, errors, registered.
|
|
130
|
+
"""
|
|
131
|
+
return {
|
|
132
|
+
"events_logged": self._events_logged,
|
|
133
|
+
"errors": self._errors,
|
|
134
|
+
"registered": self._registered,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
# Internal helpers
|
|
140
|
+
# ------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
def _safe_repr(obj: Any, max_len: int = 200) -> str:
|
|
143
|
+
"""Safe repr that truncates and never raises."""
|
|
144
|
+
try:
|
|
145
|
+
r = repr(obj)
|
|
146
|
+
return r[:max_len] + "..." if len(r) > max_len else r
|
|
147
|
+
except Exception:
|
|
148
|
+
return "<unrepresentable>"
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""Compliance retention manager — regulatory retention enforcement.
|
|
4
|
+
|
|
5
|
+
Unlike the lifecycle ``retention_policy.py`` (which manages lifecycle-level
|
|
6
|
+
policies stored alongside memory.db), this compliance module is the
|
|
7
|
+
*regulatory* layer that:
|
|
8
|
+
|
|
9
|
+
- Links retention rules to regulatory frameworks (GDPR, EU AI Act, HIPAA).
|
|
10
|
+
- Enforces GDPR right-to-erasure (tombstone memory + preserve audit trail).
|
|
11
|
+
- Enforces EU AI Act audit retention (10-year minimum for audit records).
|
|
12
|
+
- Records every retention action in audit.db for tamper-evident compliance.
|
|
13
|
+
|
|
14
|
+
Rules are stored in audit.db (``compliance_retention_rules`` table) so that
|
|
15
|
+
the audit database remains the single source of truth for all compliance
|
|
16
|
+
configuration and evidence.
|
|
17
|
+
"""
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import sqlite3
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from typing import Any, Dict, List, Optional
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
_RULES_TABLE_SQL = """
|
|
28
|
+
CREATE TABLE IF NOT EXISTS compliance_retention_rules (
|
|
29
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
30
|
+
name TEXT NOT NULL,
|
|
31
|
+
framework TEXT NOT NULL,
|
|
32
|
+
retention_days INTEGER NOT NULL,
|
|
33
|
+
action TEXT NOT NULL,
|
|
34
|
+
applies_to TEXT NOT NULL,
|
|
35
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
36
|
+
)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
_AUDIT_EVENTS_TABLE_SQL = """
|
|
40
|
+
CREATE TABLE IF NOT EXISTS audit_events (
|
|
41
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
42
|
+
event_type TEXT NOT NULL,
|
|
43
|
+
actor TEXT NOT NULL,
|
|
44
|
+
resource_id INTEGER,
|
|
45
|
+
details TEXT DEFAULT '{}',
|
|
46
|
+
prev_hash TEXT NOT NULL DEFAULT 'genesis',
|
|
47
|
+
entry_hash TEXT NOT NULL DEFAULT '',
|
|
48
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
49
|
+
)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _compute_hash(event_type: str, actor: str, resource_id: Any,
|
|
54
|
+
details: str, prev_hash: str, ts: str) -> str:
|
|
55
|
+
"""Compute a SHA-256 hash for a single audit event."""
|
|
56
|
+
payload = f"{event_type}|{actor}|{resource_id}|{details}|{prev_hash}|{ts}"
|
|
57
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ComplianceRetentionManager:
|
|
61
|
+
"""Enforces regulatory retention policies across memory and audit DBs.
|
|
62
|
+
|
|
63
|
+
Connects to *both* databases:
|
|
64
|
+
- ``memory_db_path``: where memories live (tombstoning happens here).
|
|
65
|
+
- ``audit_db_path``: where rules and audit events are stored.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self, memory_db_path: str, audit_db_path: str):
|
|
69
|
+
self._memory_db_path = memory_db_path
|
|
70
|
+
self._audit_db_path = audit_db_path
|
|
71
|
+
self._ensure_tables()
|
|
72
|
+
|
|
73
|
+
# ------------------------------------------------------------------
|
|
74
|
+
# Internal helpers
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def _connect_audit(self) -> sqlite3.Connection:
|
|
78
|
+
conn = sqlite3.connect(self._audit_db_path)
|
|
79
|
+
conn.row_factory = sqlite3.Row
|
|
80
|
+
return conn
|
|
81
|
+
|
|
82
|
+
def _connect_memory(self) -> sqlite3.Connection:
|
|
83
|
+
conn = sqlite3.connect(self._memory_db_path)
|
|
84
|
+
conn.row_factory = sqlite3.Row
|
|
85
|
+
return conn
|
|
86
|
+
|
|
87
|
+
def _ensure_tables(self) -> None:
|
|
88
|
+
conn = self._connect_audit()
|
|
89
|
+
try:
|
|
90
|
+
conn.execute(_RULES_TABLE_SQL)
|
|
91
|
+
conn.execute(_AUDIT_EVENTS_TABLE_SQL)
|
|
92
|
+
conn.commit()
|
|
93
|
+
finally:
|
|
94
|
+
conn.close()
|
|
95
|
+
|
|
96
|
+
def _log_audit_event(self, event_type: str, actor: str,
|
|
97
|
+
resource_id: Optional[int],
|
|
98
|
+
details: Dict[str, Any]) -> None:
|
|
99
|
+
"""Append a tamper-evident audit event to audit.db."""
|
|
100
|
+
conn = self._connect_audit()
|
|
101
|
+
try:
|
|
102
|
+
last = conn.execute(
|
|
103
|
+
"SELECT entry_hash FROM audit_events ORDER BY id DESC LIMIT 1"
|
|
104
|
+
).fetchone()
|
|
105
|
+
prev_hash = last["entry_hash"] if last else "genesis"
|
|
106
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
107
|
+
details_json = json.dumps(details, default=str)
|
|
108
|
+
entry_hash = _compute_hash(
|
|
109
|
+
event_type, actor, resource_id, details_json, prev_hash, ts,
|
|
110
|
+
)
|
|
111
|
+
conn.execute(
|
|
112
|
+
"INSERT INTO audit_events "
|
|
113
|
+
"(event_type, actor, resource_id, details, prev_hash, entry_hash, created_at) "
|
|
114
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
115
|
+
(event_type, actor, resource_id, details_json, prev_hash,
|
|
116
|
+
entry_hash, ts),
|
|
117
|
+
)
|
|
118
|
+
conn.commit()
|
|
119
|
+
finally:
|
|
120
|
+
conn.close()
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _parse_json(value: Any) -> Any:
|
|
124
|
+
if isinstance(value, str):
|
|
125
|
+
try:
|
|
126
|
+
return json.loads(value)
|
|
127
|
+
except (json.JSONDecodeError, TypeError):
|
|
128
|
+
return value
|
|
129
|
+
return value if value is not None else []
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _matches(criteria: Any, mem_tags: Any, mem_project: Optional[str]) -> bool:
|
|
133
|
+
"""Return True when a rule's ``applies_to`` matches the memory."""
|
|
134
|
+
if not isinstance(criteria, dict) or not criteria:
|
|
135
|
+
return False
|
|
136
|
+
ok = True
|
|
137
|
+
if "tags" in criteria:
|
|
138
|
+
rule_tags = set(criteria["tags"]) if criteria["tags"] else set()
|
|
139
|
+
m_tags = set(mem_tags) if isinstance(mem_tags, list) else set()
|
|
140
|
+
if not rule_tags & m_tags:
|
|
141
|
+
ok = False
|
|
142
|
+
if "project_name" in criteria:
|
|
143
|
+
if mem_project != criteria["project_name"]:
|
|
144
|
+
ok = False
|
|
145
|
+
return ok
|
|
146
|
+
|
|
147
|
+
# ------------------------------------------------------------------
|
|
148
|
+
# Public API
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
def create_retention_rule(self, name: str, framework: str,
|
|
152
|
+
retention_days: int, action: str,
|
|
153
|
+
applies_to: Dict[str, Any]) -> int:
|
|
154
|
+
"""Create a compliance retention rule in audit.db.
|
|
155
|
+
|
|
156
|
+
Returns the auto-generated rule ID.
|
|
157
|
+
"""
|
|
158
|
+
conn = self._connect_audit()
|
|
159
|
+
try:
|
|
160
|
+
cur = conn.execute(
|
|
161
|
+
"INSERT INTO compliance_retention_rules "
|
|
162
|
+
"(name, framework, retention_days, action, applies_to) "
|
|
163
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
164
|
+
(name, framework, retention_days, action,
|
|
165
|
+
json.dumps(applies_to)),
|
|
166
|
+
)
|
|
167
|
+
conn.commit()
|
|
168
|
+
rule_id = cur.lastrowid
|
|
169
|
+
finally:
|
|
170
|
+
conn.close()
|
|
171
|
+
|
|
172
|
+
self._log_audit_event(
|
|
173
|
+
"retention.rule_created", "system", rule_id,
|
|
174
|
+
{"name": name, "framework": framework},
|
|
175
|
+
)
|
|
176
|
+
return rule_id
|
|
177
|
+
|
|
178
|
+
def list_rules(self) -> List[Dict[str, Any]]:
|
|
179
|
+
"""Return all compliance retention rules."""
|
|
180
|
+
conn = self._connect_audit()
|
|
181
|
+
try:
|
|
182
|
+
rows = conn.execute(
|
|
183
|
+
"SELECT * FROM compliance_retention_rules ORDER BY id"
|
|
184
|
+
).fetchall()
|
|
185
|
+
result = []
|
|
186
|
+
for r in rows:
|
|
187
|
+
d = dict(r)
|
|
188
|
+
if isinstance(d.get("applies_to"), str):
|
|
189
|
+
d["applies_to"] = self._parse_json(d["applies_to"])
|
|
190
|
+
result.append(d)
|
|
191
|
+
return result
|
|
192
|
+
finally:
|
|
193
|
+
conn.close()
|
|
194
|
+
|
|
195
|
+
def evaluate_memory(self, memory_id: int) -> Optional[Dict[str, Any]]:
|
|
196
|
+
"""Check which compliance rule applies to a memory.
|
|
197
|
+
|
|
198
|
+
Reads the memory's tags/project from memory.db, then evaluates
|
|
199
|
+
all rules from audit.db. The first matching rule (ordered by id)
|
|
200
|
+
is returned.
|
|
201
|
+
|
|
202
|
+
Returns a dict with ``rule_name``, ``action``, ``retention_days``,
|
|
203
|
+
``framework``; or ``None`` if no rule matches.
|
|
204
|
+
"""
|
|
205
|
+
mem_conn = self._connect_memory()
|
|
206
|
+
try:
|
|
207
|
+
mem = mem_conn.execute(
|
|
208
|
+
"SELECT tags, project_name FROM memories WHERE id = ?",
|
|
209
|
+
(memory_id,),
|
|
210
|
+
).fetchone()
|
|
211
|
+
if mem is None:
|
|
212
|
+
return None
|
|
213
|
+
mem_tags = self._parse_json(mem["tags"])
|
|
214
|
+
mem_project = mem["project_name"]
|
|
215
|
+
finally:
|
|
216
|
+
mem_conn.close()
|
|
217
|
+
|
|
218
|
+
audit_conn = self._connect_audit()
|
|
219
|
+
try:
|
|
220
|
+
rules = audit_conn.execute(
|
|
221
|
+
"SELECT * FROM compliance_retention_rules ORDER BY id"
|
|
222
|
+
).fetchall()
|
|
223
|
+
for rule in rules:
|
|
224
|
+
criteria = self._parse_json(rule["applies_to"])
|
|
225
|
+
if self._matches(criteria, mem_tags, mem_project):
|
|
226
|
+
return {
|
|
227
|
+
"rule_name": rule["name"],
|
|
228
|
+
"action": rule["action"],
|
|
229
|
+
"retention_days": rule["retention_days"],
|
|
230
|
+
"framework": rule["framework"],
|
|
231
|
+
}
|
|
232
|
+
return None
|
|
233
|
+
finally:
|
|
234
|
+
audit_conn.close()
|
|
235
|
+
|
|
236
|
+
def execute_erasure_request(self, memory_id: int, framework: str,
|
|
237
|
+
requested_by: str) -> Dict[str, Any]:
|
|
238
|
+
"""Execute a GDPR (or other framework) right-to-erasure request.
|
|
239
|
+
|
|
240
|
+
1. Tombstones the memory in memory.db.
|
|
241
|
+
2. Logs the erasure event in audit.db (preserving the audit trail).
|
|
242
|
+
|
|
243
|
+
Returns a result dict with ``success``, ``action``, and ``memory_id``.
|
|
244
|
+
"""
|
|
245
|
+
mem_conn = self._connect_memory()
|
|
246
|
+
try:
|
|
247
|
+
row = mem_conn.execute(
|
|
248
|
+
"SELECT id FROM memories WHERE id = ?", (memory_id,),
|
|
249
|
+
).fetchone()
|
|
250
|
+
if row is None:
|
|
251
|
+
return {"success": False, "error": "memory_not_found",
|
|
252
|
+
"memory_id": memory_id}
|
|
253
|
+
|
|
254
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
255
|
+
mem_conn.execute(
|
|
256
|
+
"UPDATE memories SET lifecycle_state = 'tombstoned', "
|
|
257
|
+
"lifecycle_updated_at = ? WHERE id = ?",
|
|
258
|
+
(ts, memory_id),
|
|
259
|
+
)
|
|
260
|
+
mem_conn.commit()
|
|
261
|
+
finally:
|
|
262
|
+
mem_conn.close()
|
|
263
|
+
|
|
264
|
+
self._log_audit_event(
|
|
265
|
+
"retention.erasure", requested_by, memory_id,
|
|
266
|
+
{"framework": framework, "action": "tombstoned"},
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return {"success": True, "action": "tombstoned",
|
|
270
|
+
"memory_id": memory_id}
|
|
271
|
+
|
|
272
|
+
def get_compliance_status(self) -> Dict[str, Any]:
|
|
273
|
+
"""Return a summary of current compliance retention state."""
|
|
274
|
+
conn = self._connect_audit()
|
|
275
|
+
try:
|
|
276
|
+
rules = conn.execute(
|
|
277
|
+
"SELECT * FROM compliance_retention_rules"
|
|
278
|
+
).fetchall()
|
|
279
|
+
frameworks = list({r["framework"] for r in rules})
|
|
280
|
+
events_count = conn.execute(
|
|
281
|
+
"SELECT COUNT(*) AS cnt FROM audit_events"
|
|
282
|
+
).fetchone()["cnt"]
|
|
283
|
+
return {
|
|
284
|
+
"rules_count": len(rules),
|
|
285
|
+
"frameworks": sorted(frameworks),
|
|
286
|
+
"audit_events_count": events_count,
|
|
287
|
+
}
|
|
288
|
+
finally:
|
|
289
|
+
conn.close()
|