superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""SLM v2.8 Lifecycle Engine — Memory State Machine + Bounded Growth.
|
|
4
|
+
|
|
5
|
+
Manages memory states: ACTIVE → WARM → COLD → ARCHIVED → TOMBSTONED.
|
|
6
|
+
Layers on top of existing tier-based compression.
|
|
7
|
+
All features opt-in: absent config = v2.7 behavior.
|
|
8
|
+
|
|
9
|
+
Graceful degradation: if this module fails to import,
|
|
10
|
+
core memory operations continue unchanged.
|
|
11
|
+
"""
|
|
12
|
+
import threading
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional, Dict, Any
|
|
15
|
+
|
|
16
|
+
# Feature flags
|
|
17
|
+
LIFECYCLE_AVAILABLE = False
|
|
18
|
+
_init_error = None
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from .lifecycle_engine import LifecycleEngine
|
|
22
|
+
from .lifecycle_evaluator import LifecycleEvaluator
|
|
23
|
+
from .retention_policy import RetentionPolicyManager
|
|
24
|
+
from .bounded_growth import BoundedGrowthEnforcer
|
|
25
|
+
LIFECYCLE_AVAILABLE = True
|
|
26
|
+
except ImportError as e:
|
|
27
|
+
_init_error = str(e)
|
|
28
|
+
|
|
29
|
+
# Lazy singletons
|
|
30
|
+
_lifecycle_engine: Optional["LifecycleEngine"] = None
|
|
31
|
+
_lifecycle_lock = threading.Lock()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_lifecycle_engine(db_path: Optional[Path] = None) -> Optional["LifecycleEngine"]:
|
|
35
|
+
"""Get or create the lifecycle engine singleton. Returns None if unavailable."""
|
|
36
|
+
global _lifecycle_engine
|
|
37
|
+
if not LIFECYCLE_AVAILABLE:
|
|
38
|
+
return None
|
|
39
|
+
with _lifecycle_lock:
|
|
40
|
+
if _lifecycle_engine is None:
|
|
41
|
+
try:
|
|
42
|
+
_lifecycle_engine = LifecycleEngine(db_path)
|
|
43
|
+
except Exception:
|
|
44
|
+
return None
|
|
45
|
+
return _lifecycle_engine
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_status() -> Dict[str, Any]:
|
|
49
|
+
"""Return lifecycle engine status."""
|
|
50
|
+
return {
|
|
51
|
+
"lifecycle_available": LIFECYCLE_AVAILABLE,
|
|
52
|
+
"init_error": _init_error,
|
|
53
|
+
"engine_active": _lifecycle_engine is not None,
|
|
54
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""Bounded growth enforcement — ensures memory counts stay within limits.
|
|
4
|
+
|
|
5
|
+
When the count of memories in a given lifecycle state exceeds configurable
|
|
6
|
+
bounds, the lowest-scoring memories are transitioned to the next state.
|
|
7
|
+
|
|
8
|
+
Scoring formula: importance_norm * recency_factor * frequency_factor * behavioral_value
|
|
9
|
+
- importance_norm: importance / 10.0 (0.1 to 1.0)
|
|
10
|
+
- recency_factor: 1.0 / (1.0 + days_stale / 30.0) (exponential decay)
|
|
11
|
+
- frequency_factor: 0.5 + 0.5 * min(access_count / age_days, 1.0)
|
|
12
|
+
- behavioral_value: 1.0 (placeholder for Phase 2 integration)
|
|
13
|
+
|
|
14
|
+
Lower score = evict first.
|
|
15
|
+
"""
|
|
16
|
+
import sqlite3
|
|
17
|
+
import json
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional, Dict, Any, List
|
|
21
|
+
|
|
22
|
+
from .lifecycle_engine import LifecycleEngine
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Default bounds — generous defaults, configurable via lifecycle_config.json
|
|
26
|
+
DEFAULT_BOUNDS: Dict[str, int] = {
|
|
27
|
+
"max_active": 10000,
|
|
28
|
+
"max_warm": 5000,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BoundedGrowthEnforcer:
|
|
33
|
+
"""Enforces memory count limits by transitioning lowest-scoring memories.
|
|
34
|
+
|
|
35
|
+
When active memories exceed max_active, the lowest-scoring transition to warm.
|
|
36
|
+
When warm memories exceed max_warm, the lowest-scoring transition to cold.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
db_path: Optional[str] = None,
|
|
42
|
+
config_path: Optional[str] = None,
|
|
43
|
+
):
|
|
44
|
+
if db_path is None:
|
|
45
|
+
db_path = str(Path.home() / ".claude-memory" / "memory.db")
|
|
46
|
+
self._db_path = str(db_path)
|
|
47
|
+
self._config_path = config_path
|
|
48
|
+
self._engine = LifecycleEngine(self._db_path)
|
|
49
|
+
|
|
50
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
51
|
+
"""Get a SQLite connection to memory.db."""
|
|
52
|
+
conn = sqlite3.connect(self._db_path)
|
|
53
|
+
conn.row_factory = sqlite3.Row
|
|
54
|
+
return conn
|
|
55
|
+
|
|
56
|
+
def enforce_bounds(self, profile: Optional[str] = None) -> Dict[str, Any]:
|
|
57
|
+
"""Check memory counts and transition excess memories.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
profile: Filter by profile (None = all)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Dict with enforced status, counts, limits, and transitions list
|
|
64
|
+
"""
|
|
65
|
+
bounds = self._load_bounds()
|
|
66
|
+
max_active = bounds.get("max_active", DEFAULT_BOUNDS["max_active"])
|
|
67
|
+
max_warm = bounds.get("max_warm", DEFAULT_BOUNDS["max_warm"])
|
|
68
|
+
|
|
69
|
+
transitions: List[Dict[str, Any]] = []
|
|
70
|
+
|
|
71
|
+
# Enforce active limit
|
|
72
|
+
active_transitions = self._enforce_state_limit(
|
|
73
|
+
state="active",
|
|
74
|
+
target_state="warm",
|
|
75
|
+
max_count=max_active,
|
|
76
|
+
profile=profile,
|
|
77
|
+
)
|
|
78
|
+
transitions.extend(active_transitions)
|
|
79
|
+
|
|
80
|
+
# Enforce warm limit
|
|
81
|
+
warm_transitions = self._enforce_state_limit(
|
|
82
|
+
state="warm",
|
|
83
|
+
target_state="cold",
|
|
84
|
+
max_count=max_warm,
|
|
85
|
+
profile=profile,
|
|
86
|
+
)
|
|
87
|
+
transitions.extend(warm_transitions)
|
|
88
|
+
|
|
89
|
+
# Build result
|
|
90
|
+
dist = self._engine.get_state_distribution()
|
|
91
|
+
return {
|
|
92
|
+
"enforced": len(transitions) > 0,
|
|
93
|
+
"active_count": dist.get("active", 0),
|
|
94
|
+
"active_limit": max_active,
|
|
95
|
+
"warm_count": dist.get("warm", 0),
|
|
96
|
+
"warm_limit": max_warm,
|
|
97
|
+
"transitions": transitions,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
def _enforce_state_limit(
|
|
101
|
+
self,
|
|
102
|
+
state: str,
|
|
103
|
+
target_state: str,
|
|
104
|
+
max_count: int,
|
|
105
|
+
profile: Optional[str] = None,
|
|
106
|
+
) -> List[Dict[str, Any]]:
|
|
107
|
+
"""Enforce a single state's count limit.
|
|
108
|
+
|
|
109
|
+
Scores all memories in the given state, evicts the lowest-scoring
|
|
110
|
+
excess memories to target_state.
|
|
111
|
+
"""
|
|
112
|
+
scored = self.score_all_memories(state=state, profile=profile)
|
|
113
|
+
current_count = len(scored)
|
|
114
|
+
|
|
115
|
+
if current_count <= max_count:
|
|
116
|
+
return []
|
|
117
|
+
|
|
118
|
+
excess = current_count - max_count
|
|
119
|
+
# Sort ascending by score — lowest scores evicted first
|
|
120
|
+
scored.sort(key=lambda s: s["score"])
|
|
121
|
+
to_evict = scored[:excess]
|
|
122
|
+
|
|
123
|
+
# Batch transition for performance — single connection, single commit
|
|
124
|
+
mem_ids = [e["memory_id"] for e in to_evict]
|
|
125
|
+
reasons = [f"bounded_growth_score_{e['score']:.4f}" for e in to_evict]
|
|
126
|
+
score_map = {e["memory_id"]: e["score"] for e in to_evict}
|
|
127
|
+
|
|
128
|
+
result = self._engine.batch_transition(mem_ids, target_state, reasons)
|
|
129
|
+
|
|
130
|
+
transitions = []
|
|
131
|
+
for entry in result.get("succeeded", []):
|
|
132
|
+
transitions.append({
|
|
133
|
+
"memory_id": entry["memory_id"],
|
|
134
|
+
"from_state": state,
|
|
135
|
+
"to_state": target_state,
|
|
136
|
+
"score": score_map.get(entry["memory_id"], 0.0),
|
|
137
|
+
})
|
|
138
|
+
return transitions
|
|
139
|
+
|
|
140
|
+
def score_all_memories(
|
|
141
|
+
self,
|
|
142
|
+
state: Optional[str] = None,
|
|
143
|
+
profile: Optional[str] = None,
|
|
144
|
+
) -> List[Dict[str, Any]]:
|
|
145
|
+
"""Score all memories in a given state.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
state: Lifecycle state to filter (None = all non-terminal states)
|
|
149
|
+
profile: Filter by profile
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
List of dicts with memory_id and score, sorted descending by score
|
|
153
|
+
"""
|
|
154
|
+
conn = self._get_connection()
|
|
155
|
+
try:
|
|
156
|
+
if state:
|
|
157
|
+
query = (
|
|
158
|
+
"SELECT id, importance, last_accessed, created_at, access_count "
|
|
159
|
+
"FROM memories WHERE lifecycle_state = ?"
|
|
160
|
+
)
|
|
161
|
+
params: list = [state]
|
|
162
|
+
else:
|
|
163
|
+
query = (
|
|
164
|
+
"SELECT id, importance, last_accessed, created_at, access_count "
|
|
165
|
+
"FROM memories WHERE lifecycle_state IN ('active', 'warm', 'cold')"
|
|
166
|
+
)
|
|
167
|
+
params = []
|
|
168
|
+
|
|
169
|
+
if profile:
|
|
170
|
+
query += " AND profile = ?"
|
|
171
|
+
params.append(profile)
|
|
172
|
+
|
|
173
|
+
rows = conn.execute(query, params).fetchall()
|
|
174
|
+
now = datetime.now()
|
|
175
|
+
|
|
176
|
+
scores = []
|
|
177
|
+
for row in rows:
|
|
178
|
+
score = self._score_row(row, now)
|
|
179
|
+
scores.append({"memory_id": row["id"], "score": score})
|
|
180
|
+
|
|
181
|
+
scores.sort(key=lambda s: s["score"], reverse=True)
|
|
182
|
+
return scores
|
|
183
|
+
finally:
|
|
184
|
+
conn.close()
|
|
185
|
+
|
|
186
|
+
def _score_row(self, row: sqlite3.Row, now: datetime) -> float:
|
|
187
|
+
"""Compute composite lifecycle score for a memory.
|
|
188
|
+
|
|
189
|
+
Score = importance_norm * recency_factor * frequency_factor * behavioral_value
|
|
190
|
+
|
|
191
|
+
Higher score = more valuable = keep longer.
|
|
192
|
+
"""
|
|
193
|
+
# Importance: normalize to 0.1-1.0
|
|
194
|
+
importance = max(row["importance"] or 5, 1)
|
|
195
|
+
importance_norm = importance / 10.0
|
|
196
|
+
|
|
197
|
+
# Recency: exponential decay, halves every ~30 days
|
|
198
|
+
last_access_str = row["last_accessed"] or row["created_at"]
|
|
199
|
+
days_stale = 0
|
|
200
|
+
if last_access_str:
|
|
201
|
+
try:
|
|
202
|
+
last_access = datetime.fromisoformat(str(last_access_str))
|
|
203
|
+
days_stale = max((now - last_access).days, 0)
|
|
204
|
+
except (ValueError, TypeError):
|
|
205
|
+
days_stale = 0
|
|
206
|
+
recency_factor = 1.0 / (1.0 + days_stale / 30.0)
|
|
207
|
+
|
|
208
|
+
# Access frequency: normalized by age
|
|
209
|
+
access_count = row["access_count"] or 0
|
|
210
|
+
created_str = row["created_at"]
|
|
211
|
+
age_days = 1
|
|
212
|
+
if created_str:
|
|
213
|
+
try:
|
|
214
|
+
created = datetime.fromisoformat(str(created_str))
|
|
215
|
+
age_days = max((now - created).days, 1)
|
|
216
|
+
except (ValueError, TypeError):
|
|
217
|
+
age_days = 1
|
|
218
|
+
frequency_factor = 0.5 + 0.5 * min(access_count / age_days, 1.0)
|
|
219
|
+
|
|
220
|
+
# Behavioral value: placeholder for Phase 2 integration
|
|
221
|
+
behavioral_value = 1.0
|
|
222
|
+
|
|
223
|
+
return importance_norm * recency_factor * frequency_factor * behavioral_value
|
|
224
|
+
|
|
225
|
+
def _load_bounds(self) -> Dict[str, int]:
|
|
226
|
+
"""Load bounds config from lifecycle_config.json. Returns defaults if missing."""
|
|
227
|
+
try:
|
|
228
|
+
if self._config_path:
|
|
229
|
+
config_path = Path(self._config_path)
|
|
230
|
+
else:
|
|
231
|
+
config_path = Path(self._db_path).parent / "lifecycle_config.json"
|
|
232
|
+
if config_path.exists():
|
|
233
|
+
with open(config_path) as f:
|
|
234
|
+
user_config = json.load(f)
|
|
235
|
+
bounds = user_config.get("bounds", {})
|
|
236
|
+
return {**DEFAULT_BOUNDS, **bounds}
|
|
237
|
+
except Exception:
|
|
238
|
+
pass
|
|
239
|
+
return dict(DEFAULT_BOUNDS)
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""Formal memory compaction with information preservation guarantees.
|
|
4
|
+
|
|
5
|
+
Archives full content before compaction, replaces with compressed summary
|
|
6
|
+
containing key entities. Supports lossless restoration from archive.
|
|
7
|
+
"""
|
|
8
|
+
import re
|
|
9
|
+
import sqlite3
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Common English stopwords for entity extraction (no external dependencies)
|
|
15
|
+
_STOPWORDS = frozenset(
|
|
16
|
+
"a an the is are was were be been being have has had do does did will would "
|
|
17
|
+
"shall should may might can could am not no nor and but or if then else when "
|
|
18
|
+
"at by for with about against between through during before after above below "
|
|
19
|
+
"to from up down in out on off over under again further once here there all "
|
|
20
|
+
"each every both few more most other some such only own same so than too very "
|
|
21
|
+
"just don doesn didn won wouldn isn aren wasn weren hasn haven hadn it its "
|
|
22
|
+
"i me my myself we our ours ourselves you your yours yourself yourselves he "
|
|
23
|
+
"him his himself she her hers herself they them their theirs themselves what "
|
|
24
|
+
"which who whom this that these those of as into how also many use used uses "
|
|
25
|
+
"using like make makes made includes include including provides provide "
|
|
26
|
+
"widely ideal rapid many".split()
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Minimum word length for entity candidates
|
|
30
|
+
_MIN_WORD_LEN = 3
|
|
31
|
+
# Maximum number of entities to extract
|
|
32
|
+
_MAX_ENTITIES = 8
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CompactionEngine:
|
|
36
|
+
"""Manages content compaction and restoration for memory lifecycle.
|
|
37
|
+
|
|
38
|
+
When a memory transitions to ARCHIVED state, this engine:
|
|
39
|
+
1. Saves the full content to memory_archive (lossless backup)
|
|
40
|
+
2. Replaces content with a compact summary + key entities
|
|
41
|
+
3. Can restore full content if memory is reactivated
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, db_path: str) -> None:
|
|
45
|
+
self.db_path = db_path
|
|
46
|
+
self._ensure_archive_table()
|
|
47
|
+
|
|
48
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
49
|
+
"""Create a new connection with row factory."""
|
|
50
|
+
conn = sqlite3.connect(self.db_path)
|
|
51
|
+
conn.row_factory = sqlite3.Row
|
|
52
|
+
return conn
|
|
53
|
+
|
|
54
|
+
def _ensure_archive_table(self) -> None:
|
|
55
|
+
"""Ensure the memory_archive table exists."""
|
|
56
|
+
conn = self._get_conn()
|
|
57
|
+
try:
|
|
58
|
+
conn.execute("""
|
|
59
|
+
CREATE TABLE IF NOT EXISTS memory_archive (
|
|
60
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
61
|
+
memory_id INTEGER UNIQUE NOT NULL,
|
|
62
|
+
full_content TEXT NOT NULL,
|
|
63
|
+
archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
64
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
65
|
+
)
|
|
66
|
+
""")
|
|
67
|
+
conn.execute(
|
|
68
|
+
"CREATE INDEX IF NOT EXISTS idx_archive_memory "
|
|
69
|
+
"ON memory_archive(memory_id)"
|
|
70
|
+
)
|
|
71
|
+
conn.commit()
|
|
72
|
+
finally:
|
|
73
|
+
conn.close()
|
|
74
|
+
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
# Public API
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def compact_memory(
|
|
80
|
+
self, memory_id: int, dry_run: bool = False
|
|
81
|
+
) -> Dict:
|
|
82
|
+
"""Compact a memory: archive full content, replace with summary.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
memory_id: ID of the memory to compact.
|
|
86
|
+
dry_run: If True, compute result but do not modify the database.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Dict with keys: success, entities, summary, original_length,
|
|
90
|
+
and optionally dry_run, error.
|
|
91
|
+
"""
|
|
92
|
+
conn = self._get_conn()
|
|
93
|
+
try:
|
|
94
|
+
row = conn.execute(
|
|
95
|
+
"SELECT id, content, tags FROM memories WHERE id = ?",
|
|
96
|
+
(memory_id,),
|
|
97
|
+
).fetchone()
|
|
98
|
+
|
|
99
|
+
if row is None:
|
|
100
|
+
return {"success": False, "error": "Memory not found"}
|
|
101
|
+
|
|
102
|
+
content = row["content"]
|
|
103
|
+
original_length = len(content)
|
|
104
|
+
|
|
105
|
+
# Extract key entities (pure-Python TF-IDF-like approach)
|
|
106
|
+
entities = self._extract_entities(content)
|
|
107
|
+
summary = self._build_summary(content, entities, original_length)
|
|
108
|
+
|
|
109
|
+
if dry_run:
|
|
110
|
+
return {
|
|
111
|
+
"success": True,
|
|
112
|
+
"dry_run": True,
|
|
113
|
+
"entities": entities,
|
|
114
|
+
"summary": summary,
|
|
115
|
+
"original_length": original_length,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Archive the full content (lossless backup)
|
|
119
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
120
|
+
conn.execute(
|
|
121
|
+
"INSERT OR REPLACE INTO memory_archive "
|
|
122
|
+
"(memory_id, full_content, archived_at) VALUES (?, ?, ?)",
|
|
123
|
+
(memory_id, content, now),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Replace memory content with compacted version
|
|
127
|
+
conn.execute(
|
|
128
|
+
"UPDATE memories SET content = ? WHERE id = ?",
|
|
129
|
+
(summary, memory_id),
|
|
130
|
+
)
|
|
131
|
+
conn.commit()
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
"success": True,
|
|
135
|
+
"entities": entities,
|
|
136
|
+
"summary": summary,
|
|
137
|
+
"original_length": original_length,
|
|
138
|
+
}
|
|
139
|
+
finally:
|
|
140
|
+
conn.close()
|
|
141
|
+
|
|
142
|
+
def restore_memory(self, memory_id: int) -> Dict:
|
|
143
|
+
"""Restore a compacted memory from its archive.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
memory_id: ID of the memory to restore.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Dict with keys: success, and optionally restored_length, error.
|
|
150
|
+
"""
|
|
151
|
+
conn = self._get_conn()
|
|
152
|
+
try:
|
|
153
|
+
archive = conn.execute(
|
|
154
|
+
"SELECT full_content FROM memory_archive WHERE memory_id = ?",
|
|
155
|
+
(memory_id,),
|
|
156
|
+
).fetchone()
|
|
157
|
+
|
|
158
|
+
if archive is None:
|
|
159
|
+
return {"success": False, "error": "No archive found for memory"}
|
|
160
|
+
|
|
161
|
+
full_content = archive["full_content"]
|
|
162
|
+
|
|
163
|
+
# Restore original content
|
|
164
|
+
conn.execute(
|
|
165
|
+
"UPDATE memories SET content = ? WHERE id = ?",
|
|
166
|
+
(full_content, memory_id),
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Remove the archive entry (content is back in main table)
|
|
170
|
+
conn.execute(
|
|
171
|
+
"DELETE FROM memory_archive WHERE memory_id = ?",
|
|
172
|
+
(memory_id,),
|
|
173
|
+
)
|
|
174
|
+
conn.commit()
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
"success": True,
|
|
178
|
+
"restored_length": len(full_content),
|
|
179
|
+
}
|
|
180
|
+
finally:
|
|
181
|
+
conn.close()
|
|
182
|
+
|
|
183
|
+
# ------------------------------------------------------------------
|
|
184
|
+
# Internal helpers
|
|
185
|
+
# ------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def _extract_entities(text: str) -> List[str]:
|
|
189
|
+
"""Extract key entities using word-frequency ranking.
|
|
190
|
+
|
|
191
|
+
Pure Python implementation — no sklearn or external NLP deps.
|
|
192
|
+
Tokenizes, removes stopwords, counts frequency, returns top terms.
|
|
193
|
+
"""
|
|
194
|
+
# Tokenize: split on non-alphanumeric, lowercase
|
|
195
|
+
tokens = re.findall(r"[a-zA-Z]+", text.lower())
|
|
196
|
+
|
|
197
|
+
# Filter: remove stopwords and short tokens
|
|
198
|
+
meaningful = [
|
|
199
|
+
t for t in tokens
|
|
200
|
+
if t not in _STOPWORDS and len(t) >= _MIN_WORD_LEN
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
# Count frequencies
|
|
204
|
+
freq: Dict[str, int] = {}
|
|
205
|
+
for token in meaningful:
|
|
206
|
+
freq[token] = freq.get(token, 0) + 1
|
|
207
|
+
|
|
208
|
+
# Sort by frequency descending, then alphabetically for stability
|
|
209
|
+
ranked = sorted(freq.items(), key=lambda x: (-x[1], x[0]))
|
|
210
|
+
|
|
211
|
+
# Return top N entities
|
|
212
|
+
return [word for word, _ in ranked[:_MAX_ENTITIES]]
|
|
213
|
+
|
|
214
|
+
@staticmethod
|
|
215
|
+
def _build_summary(
|
|
216
|
+
text: str, entities: List[str], original_length: int
|
|
217
|
+
) -> str:
|
|
218
|
+
"""Build the compacted content string.
|
|
219
|
+
|
|
220
|
+
Format: [COMPACTED] Key entities: e1, e2, ... Original length: N chars.
|
|
221
|
+
"""
|
|
222
|
+
entity_str = ", ".join(entities) if entities else "none"
|
|
223
|
+
return (
|
|
224
|
+
f"[COMPACTED] Key entities: {entity_str}. "
|
|
225
|
+
f"Original length: {original_length} chars."
|
|
226
|
+
)
|