superlocalmemory 2.8.2 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/api_server.py +5 -0
- package/bin/slm.bat +3 -3
- package/docs/SECURITY-QUICK-REFERENCE.md +214 -0
- package/install.ps1 +11 -11
- package/mcp_server.py +3 -3
- package/package.json +2 -2
- package/requirements-core.txt +16 -18
- package/requirements-learning.txt +8 -8
- package/requirements.txt +9 -7
- package/scripts/prepack.js +33 -0
- package/scripts/verify-v27.ps1 +301 -0
- package/src/agent_registry.py +32 -28
- package/src/auto_backup.py +12 -6
- package/src/cache_manager.py +2 -2
- package/src/compression/__init__.py +25 -0
- package/src/compression/cli.py +150 -0
- package/src/compression/cold_storage.py +217 -0
- package/src/compression/config.py +72 -0
- package/src/compression/orchestrator.py +133 -0
- package/src/compression/tier2_compressor.py +228 -0
- package/src/compression/tier3_compressor.py +153 -0
- package/src/compression/tier_classifier.py +148 -0
- package/src/db_connection_manager.py +5 -5
- package/src/event_bus.py +24 -22
- package/src/hnsw_index.py +3 -3
- package/src/learning/__init__.py +5 -4
- package/src/learning/adaptive_ranker.py +14 -265
- package/src/learning/bootstrap/__init__.py +69 -0
- package/src/learning/bootstrap/constants.py +93 -0
- package/src/learning/bootstrap/db_queries.py +316 -0
- package/src/learning/bootstrap/sampling.py +82 -0
- package/src/learning/bootstrap/text_utils.py +71 -0
- package/src/learning/cross_project_aggregator.py +58 -57
- package/src/learning/db/__init__.py +40 -0
- package/src/learning/db/constants.py +44 -0
- package/src/learning/db/schema.py +279 -0
- package/src/learning/learning_db.py +15 -234
- package/src/learning/ranking/__init__.py +33 -0
- package/src/learning/ranking/constants.py +84 -0
- package/src/learning/ranking/helpers.py +278 -0
- package/src/learning/source_quality_scorer.py +66 -65
- package/src/learning/synthetic_bootstrap.py +28 -310
- package/src/memory/__init__.py +36 -0
- package/src/memory/cli.py +205 -0
- package/src/memory/constants.py +39 -0
- package/src/memory/helpers.py +28 -0
- package/src/memory/schema.py +166 -0
- package/src/memory-profiles.py +94 -86
- package/src/memory-reset.py +187 -185
- package/src/memory_compression.py +2 -2
- package/src/memory_store_v2.py +34 -354
- package/src/migrate_v1_to_v2.py +11 -10
- package/src/patterns/analyzers.py +104 -100
- package/src/patterns/learner.py +17 -13
- package/src/patterns/scoring.py +25 -21
- package/src/patterns/store.py +40 -38
- package/src/patterns/terminology.py +53 -51
- package/src/provenance_tracker.py +2 -2
- package/src/qualixar_attribution.py +1 -1
- package/src/search/engine.py +16 -14
- package/src/search/index_loader.py +13 -11
- package/src/setup_validator.py +160 -158
- package/src/subscription_manager.py +20 -18
- package/src/tree/builder.py +66 -64
- package/src/tree/nodes.py +103 -97
- package/src/tree/queries.py +142 -137
- package/src/tree/schema.py +46 -42
- package/src/webhook_dispatcher.py +3 -3
- package/ui_server.py +7 -4
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Cold storage management for archived memories.
|
|
6
|
+
Handles compression and archival to gzipped JSON files.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlite3
|
|
10
|
+
import json
|
|
11
|
+
import gzip
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Dict, Optional, Any
|
|
15
|
+
|
|
16
|
+
from compression.config import CompressionConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
20
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
21
|
+
COLD_STORAGE_PATH = MEMORY_DIR / "cold-storage"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ColdStorageManager:
|
|
25
|
+
"""Manage cold storage archives for very old memories."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, db_path: Path = DB_PATH, storage_path: Path = COLD_STORAGE_PATH):
|
|
28
|
+
self.db_path = db_path
|
|
29
|
+
self.storage_path = storage_path
|
|
30
|
+
self.storage_path.mkdir(exist_ok=True)
|
|
31
|
+
self.config = CompressionConfig()
|
|
32
|
+
|
|
33
|
+
def move_to_cold_storage(self, memory_ids: List[int]) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Move archived memories to gzipped JSON file.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
memory_ids: List of memory IDs to archive
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Number of memories archived
|
|
42
|
+
"""
|
|
43
|
+
if not memory_ids:
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
conn = sqlite3.connect(self.db_path)
|
|
47
|
+
try:
|
|
48
|
+
cursor = conn.cursor()
|
|
49
|
+
|
|
50
|
+
# Build placeholders for SQL query
|
|
51
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
52
|
+
|
|
53
|
+
# Get memories from archive table
|
|
54
|
+
cursor.execute(f'''
|
|
55
|
+
SELECT m.id, m.content, m.summary, m.tags, m.project_name,
|
|
56
|
+
m.created_at, a.full_content
|
|
57
|
+
FROM memories m
|
|
58
|
+
LEFT JOIN memory_archive a ON m.id = a.memory_id
|
|
59
|
+
WHERE m.id IN ({placeholders})
|
|
60
|
+
''', memory_ids)
|
|
61
|
+
|
|
62
|
+
memories = cursor.fetchall()
|
|
63
|
+
|
|
64
|
+
if not memories:
|
|
65
|
+
return 0
|
|
66
|
+
|
|
67
|
+
# Build JSON export
|
|
68
|
+
export_data = []
|
|
69
|
+
|
|
70
|
+
for memory in memories:
|
|
71
|
+
mem_id, content, summary, tags, project_name, created_at, full_content = memory
|
|
72
|
+
|
|
73
|
+
export_data.append({
|
|
74
|
+
'id': mem_id,
|
|
75
|
+
'tier3_content': self._safe_json_load(content),
|
|
76
|
+
'summary': summary,
|
|
77
|
+
'tags': self._safe_json_load(tags) if tags else [],
|
|
78
|
+
'project': project_name,
|
|
79
|
+
'created_at': created_at,
|
|
80
|
+
'full_content': full_content # May be None if not archived
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
# Write to gzipped file
|
|
84
|
+
filename = f"archive-{datetime.now().strftime('%Y-%m')}.json.gz"
|
|
85
|
+
filepath = self.storage_path / filename
|
|
86
|
+
|
|
87
|
+
# If file exists, append to it
|
|
88
|
+
existing_data = []
|
|
89
|
+
if filepath.exists():
|
|
90
|
+
try:
|
|
91
|
+
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
|
|
92
|
+
existing_data = json.load(f)
|
|
93
|
+
except Exception:
|
|
94
|
+
pass # File might be corrupted, start fresh
|
|
95
|
+
|
|
96
|
+
# Merge with existing data (avoid duplicates)
|
|
97
|
+
existing_ids = {item['id'] for item in existing_data}
|
|
98
|
+
for item in export_data:
|
|
99
|
+
if item['id'] not in existing_ids:
|
|
100
|
+
existing_data.append(item)
|
|
101
|
+
|
|
102
|
+
# Write combined data
|
|
103
|
+
with gzip.open(filepath, 'wt', encoding='utf-8') as f:
|
|
104
|
+
json.dump(existing_data, f, indent=2)
|
|
105
|
+
|
|
106
|
+
# Delete from archive table (keep Tier 3 version in main table)
|
|
107
|
+
cursor.executemany('DELETE FROM memory_archive WHERE memory_id = ?',
|
|
108
|
+
[(mid,) for mid in memory_ids])
|
|
109
|
+
|
|
110
|
+
conn.commit()
|
|
111
|
+
finally:
|
|
112
|
+
conn.close()
|
|
113
|
+
|
|
114
|
+
return len(export_data)
|
|
115
|
+
|
|
116
|
+
def _safe_json_load(self, data: str) -> Any:
|
|
117
|
+
"""Safely load JSON data."""
|
|
118
|
+
try:
|
|
119
|
+
return json.loads(data)
|
|
120
|
+
except (json.JSONDecodeError, TypeError):
|
|
121
|
+
return data
|
|
122
|
+
|
|
123
|
+
def restore_from_cold_storage(self, memory_id: int) -> Optional[str]:
|
|
124
|
+
"""
|
|
125
|
+
Restore full content from cold storage archive.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
memory_id: ID of memory to restore
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Full content if found, None otherwise
|
|
132
|
+
"""
|
|
133
|
+
# Search all archive files
|
|
134
|
+
for archive_file in self.storage_path.glob('archive-*.json.gz'):
|
|
135
|
+
try:
|
|
136
|
+
with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
|
|
137
|
+
data = json.load(f)
|
|
138
|
+
|
|
139
|
+
for memory in data:
|
|
140
|
+
if memory['id'] == memory_id:
|
|
141
|
+
full_content = memory.get('full_content')
|
|
142
|
+
|
|
143
|
+
if full_content:
|
|
144
|
+
# Restore to archive table
|
|
145
|
+
conn = sqlite3.connect(self.db_path)
|
|
146
|
+
try:
|
|
147
|
+
cursor = conn.cursor()
|
|
148
|
+
|
|
149
|
+
cursor.execute('''
|
|
150
|
+
INSERT OR REPLACE INTO memory_archive
|
|
151
|
+
(memory_id, full_content, archived_at)
|
|
152
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
153
|
+
''', (memory_id, full_content))
|
|
154
|
+
|
|
155
|
+
conn.commit()
|
|
156
|
+
finally:
|
|
157
|
+
conn.close()
|
|
158
|
+
|
|
159
|
+
return full_content
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"Error reading archive {archive_file}: {e}")
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
def get_cold_storage_candidates(self) -> List[int]:
|
|
167
|
+
"""Get memory IDs that are candidates for cold storage."""
|
|
168
|
+
threshold_date = datetime.now() - timedelta(days=self.config.cold_storage_threshold_days)
|
|
169
|
+
|
|
170
|
+
conn = sqlite3.connect(self.db_path)
|
|
171
|
+
try:
|
|
172
|
+
cursor = conn.cursor()
|
|
173
|
+
|
|
174
|
+
cursor.execute('''
|
|
175
|
+
SELECT id FROM memories
|
|
176
|
+
WHERE tier = 3
|
|
177
|
+
AND created_at < ?
|
|
178
|
+
AND importance < 8
|
|
179
|
+
''', (threshold_date.isoformat(),))
|
|
180
|
+
|
|
181
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
182
|
+
finally:
|
|
183
|
+
conn.close()
|
|
184
|
+
|
|
185
|
+
return memory_ids
|
|
186
|
+
|
|
187
|
+
def get_cold_storage_stats(self) -> Dict[str, Any]:
|
|
188
|
+
"""Get statistics about cold storage."""
|
|
189
|
+
stats = {
|
|
190
|
+
'archive_count': 0,
|
|
191
|
+
'total_memories': 0,
|
|
192
|
+
'total_size_bytes': 0,
|
|
193
|
+
'archives': []
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
for archive_file in self.storage_path.glob('archive-*.json.gz'):
|
|
197
|
+
try:
|
|
198
|
+
size = archive_file.stat().st_size
|
|
199
|
+
|
|
200
|
+
with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
|
|
201
|
+
data = json.load(f)
|
|
202
|
+
memory_count = len(data)
|
|
203
|
+
|
|
204
|
+
stats['archive_count'] += 1
|
|
205
|
+
stats['total_memories'] += memory_count
|
|
206
|
+
stats['total_size_bytes'] += size
|
|
207
|
+
|
|
208
|
+
stats['archives'].append({
|
|
209
|
+
'filename': archive_file.name,
|
|
210
|
+
'memory_count': memory_count,
|
|
211
|
+
'size_bytes': size,
|
|
212
|
+
'size_mb': round(size / 1024 / 1024, 2)
|
|
213
|
+
})
|
|
214
|
+
except Exception:
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
return stats
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Compression configuration management.
|
|
6
|
+
Handles loading and saving compression settings from config.json.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
15
|
+
CONFIG_PATH = MEMORY_DIR / "config.json"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CompressionConfig:
|
|
19
|
+
"""Configuration for compression behavior."""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.config = self._load_config()
|
|
23
|
+
self.compression_settings = self.config.get('compression', {})
|
|
24
|
+
|
|
25
|
+
def _load_config(self) -> Dict[str, Any]:
|
|
26
|
+
"""Load configuration from config.json."""
|
|
27
|
+
if CONFIG_PATH.exists():
|
|
28
|
+
with open(CONFIG_PATH, 'r') as f:
|
|
29
|
+
return json.load(f)
|
|
30
|
+
return {}
|
|
31
|
+
|
|
32
|
+
def save(self):
|
|
33
|
+
"""Save configuration back to config.json."""
|
|
34
|
+
with open(CONFIG_PATH, 'w') as f:
|
|
35
|
+
json.dump(self.config, f, indent=2)
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def enabled(self) -> bool:
|
|
39
|
+
return self.compression_settings.get('enabled', True)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def tier2_threshold_days(self) -> int:
|
|
43
|
+
return self.compression_settings.get('tier2_threshold_days', 30)
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def tier3_threshold_days(self) -> int:
|
|
47
|
+
return self.compression_settings.get('tier3_threshold_days', 90)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def cold_storage_threshold_days(self) -> int:
|
|
51
|
+
return self.compression_settings.get('cold_storage_threshold_days', 365)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def preserve_high_importance(self) -> bool:
|
|
55
|
+
return self.compression_settings.get('preserve_high_importance', True)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def preserve_recently_accessed(self) -> bool:
|
|
59
|
+
return self.compression_settings.get('preserve_recently_accessed', True)
|
|
60
|
+
|
|
61
|
+
def initialize_defaults(self):
|
|
62
|
+
"""Initialize compression settings in config if not present."""
|
|
63
|
+
if 'compression' not in self.config:
|
|
64
|
+
self.config['compression'] = {
|
|
65
|
+
'enabled': True,
|
|
66
|
+
'tier2_threshold_days': 30,
|
|
67
|
+
'tier3_threshold_days': 90,
|
|
68
|
+
'cold_storage_threshold_days': 365,
|
|
69
|
+
'preserve_high_importance': True,
|
|
70
|
+
'preserve_recently_accessed': True
|
|
71
|
+
}
|
|
72
|
+
self.save()
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Compression orchestrator.
|
|
6
|
+
Coordinates classification, compression, and archival operations.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlite3
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, Any
|
|
13
|
+
|
|
14
|
+
from compression.config import CompressionConfig
|
|
15
|
+
from compression.tier_classifier import TierClassifier
|
|
16
|
+
from compression.tier2_compressor import Tier2Compressor
|
|
17
|
+
from compression.tier3_compressor import Tier3Compressor
|
|
18
|
+
from compression.cold_storage import ColdStorageManager
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
22
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CompressionOrchestrator:
|
|
26
|
+
"""Main orchestrator for compression operations."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
29
|
+
self.db_path = db_path
|
|
30
|
+
self.config = CompressionConfig()
|
|
31
|
+
self.classifier = TierClassifier(db_path)
|
|
32
|
+
self.tier2_compressor = Tier2Compressor(db_path)
|
|
33
|
+
self.tier3_compressor = Tier3Compressor(db_path)
|
|
34
|
+
self.cold_storage = ColdStorageManager(db_path)
|
|
35
|
+
|
|
36
|
+
def run_full_compression(self) -> Dict[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
Run full compression cycle: classify, compress, and archive.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Statistics about compression operation
|
|
42
|
+
"""
|
|
43
|
+
if not self.config.enabled:
|
|
44
|
+
return {'status': 'disabled', 'message': 'Compression is disabled in config'}
|
|
45
|
+
|
|
46
|
+
stats = {
|
|
47
|
+
'started_at': datetime.now().isoformat(),
|
|
48
|
+
'tier_updates': 0,
|
|
49
|
+
'tier2_compressed': 0,
|
|
50
|
+
'tier3_compressed': 0,
|
|
51
|
+
'cold_stored': 0,
|
|
52
|
+
'errors': []
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Step 1: Classify memories into tiers
|
|
57
|
+
tier_updates = self.classifier.classify_memories()
|
|
58
|
+
stats['tier_updates'] = len(tier_updates)
|
|
59
|
+
|
|
60
|
+
# Step 2: Compress Tier 2 memories
|
|
61
|
+
stats['tier2_compressed'] = self.tier2_compressor.compress_all_tier2()
|
|
62
|
+
|
|
63
|
+
# Step 3: Compress Tier 3 memories
|
|
64
|
+
stats['tier3_compressed'] = self.tier3_compressor.compress_all_tier3()
|
|
65
|
+
|
|
66
|
+
# Step 4: Move old memories to cold storage
|
|
67
|
+
candidates = self.cold_storage.get_cold_storage_candidates()
|
|
68
|
+
if candidates:
|
|
69
|
+
stats['cold_stored'] = self.cold_storage.move_to_cold_storage(candidates)
|
|
70
|
+
|
|
71
|
+
# Get final tier stats
|
|
72
|
+
stats['tier_stats'] = self.classifier.get_tier_stats()
|
|
73
|
+
|
|
74
|
+
# Calculate space savings
|
|
75
|
+
stats['space_savings'] = self._calculate_space_savings()
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
stats['errors'].append(str(e))
|
|
79
|
+
|
|
80
|
+
stats['completed_at'] = datetime.now().isoformat()
|
|
81
|
+
return stats
|
|
82
|
+
|
|
83
|
+
def _calculate_space_savings(self) -> Dict[str, Any]:
|
|
84
|
+
"""Calculate estimated space savings from compression."""
|
|
85
|
+
conn = sqlite3.connect(self.db_path)
|
|
86
|
+
try:
|
|
87
|
+
cursor = conn.cursor()
|
|
88
|
+
|
|
89
|
+
# Get size of compressed content
|
|
90
|
+
cursor.execute('''
|
|
91
|
+
SELECT
|
|
92
|
+
tier,
|
|
93
|
+
COUNT(*) as count,
|
|
94
|
+
SUM(LENGTH(content)) as total_size
|
|
95
|
+
FROM memories
|
|
96
|
+
GROUP BY tier
|
|
97
|
+
''')
|
|
98
|
+
|
|
99
|
+
tier_sizes = {}
|
|
100
|
+
for tier, count, total_size in cursor.fetchall():
|
|
101
|
+
tier_sizes[tier] = {
|
|
102
|
+
'count': count,
|
|
103
|
+
'size_bytes': total_size or 0
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# Get size of archived content
|
|
107
|
+
cursor.execute('''
|
|
108
|
+
SELECT
|
|
109
|
+
COUNT(*) as count,
|
|
110
|
+
SUM(LENGTH(full_content)) as total_size
|
|
111
|
+
FROM memory_archive
|
|
112
|
+
''')
|
|
113
|
+
archive_count, archive_size = cursor.fetchone()
|
|
114
|
+
|
|
115
|
+
finally:
|
|
116
|
+
conn.close()
|
|
117
|
+
|
|
118
|
+
# Estimate original size if all were Tier 1
|
|
119
|
+
tier1_avg = tier_sizes.get(1, {}).get('size_bytes', 50000) / max(tier_sizes.get(1, {}).get('count', 1), 1)
|
|
120
|
+
total_memories = sum(t.get('count', 0) for t in tier_sizes.values())
|
|
121
|
+
estimated_original = int(tier1_avg * total_memories)
|
|
122
|
+
|
|
123
|
+
current_size = sum(t.get('size_bytes', 0) for t in tier_sizes.values())
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
'estimated_original_bytes': estimated_original,
|
|
127
|
+
'current_size_bytes': current_size,
|
|
128
|
+
'savings_bytes': estimated_original - current_size,
|
|
129
|
+
'savings_percent': round((1 - current_size / max(estimated_original, 1)) * 100, 1),
|
|
130
|
+
'tier_breakdown': tier_sizes,
|
|
131
|
+
'archive_count': archive_count or 0,
|
|
132
|
+
'archive_size_bytes': archive_size or 0
|
|
133
|
+
}
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Tier 2 compression logic.
|
|
6
|
+
Compresses memories to summary + key excerpts format.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlite3
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
18
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Tier2Compressor:
|
|
22
|
+
"""Compress memories to summary + key excerpts (Tier 2)."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
25
|
+
self.db_path = db_path
|
|
26
|
+
|
|
27
|
+
def compress_to_tier2(self, memory_id: int) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
Compress memory to summary + excerpts.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
memory_id: ID of memory to compress
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
True if compression succeeded, False otherwise
|
|
36
|
+
"""
|
|
37
|
+
conn = sqlite3.connect(self.db_path)
|
|
38
|
+
try:
|
|
39
|
+
cursor = conn.cursor()
|
|
40
|
+
|
|
41
|
+
# Get full content
|
|
42
|
+
cursor.execute('''
|
|
43
|
+
SELECT content, summary, tier FROM memories WHERE id = ?
|
|
44
|
+
''', (memory_id,))
|
|
45
|
+
result = cursor.fetchone()
|
|
46
|
+
|
|
47
|
+
if not result:
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
content, existing_summary, current_tier = result
|
|
51
|
+
|
|
52
|
+
# Skip if already compressed or in wrong tier
|
|
53
|
+
if current_tier != 2:
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
# Check if already archived (don't re-compress)
|
|
57
|
+
cursor.execute('''
|
|
58
|
+
SELECT full_content FROM memory_archive WHERE memory_id = ?
|
|
59
|
+
''', (memory_id,))
|
|
60
|
+
if cursor.fetchone():
|
|
61
|
+
return True # Already compressed
|
|
62
|
+
|
|
63
|
+
# Try to parse as JSON (might already be compressed)
|
|
64
|
+
try:
|
|
65
|
+
parsed = json.loads(content)
|
|
66
|
+
if isinstance(parsed, dict) and 'summary' in parsed:
|
|
67
|
+
return True # Already compressed
|
|
68
|
+
except (json.JSONDecodeError, TypeError):
|
|
69
|
+
pass # Not compressed yet
|
|
70
|
+
|
|
71
|
+
# Generate/enhance summary if needed
|
|
72
|
+
if not existing_summary or len(existing_summary) < 100:
|
|
73
|
+
summary = self._generate_summary(content)
|
|
74
|
+
else:
|
|
75
|
+
summary = existing_summary
|
|
76
|
+
|
|
77
|
+
# Extract key excerpts (important sentences, code blocks, lists)
|
|
78
|
+
excerpts = self._extract_key_excerpts(content)
|
|
79
|
+
|
|
80
|
+
# Store compressed version
|
|
81
|
+
compressed_content = {
|
|
82
|
+
'summary': summary,
|
|
83
|
+
'excerpts': excerpts,
|
|
84
|
+
'original_length': len(content),
|
|
85
|
+
'compressed_at': datetime.now().isoformat()
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# Move full content to archive table
|
|
89
|
+
cursor.execute('''
|
|
90
|
+
INSERT INTO memory_archive (memory_id, full_content, archived_at)
|
|
91
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
92
|
+
''', (memory_id, content))
|
|
93
|
+
|
|
94
|
+
# Update memory with compressed version
|
|
95
|
+
cursor.execute('''
|
|
96
|
+
UPDATE memories
|
|
97
|
+
SET content = ?, tier = 2, updated_at = CURRENT_TIMESTAMP
|
|
98
|
+
WHERE id = ?
|
|
99
|
+
''', (json.dumps(compressed_content), memory_id))
|
|
100
|
+
|
|
101
|
+
conn.commit()
|
|
102
|
+
return True
|
|
103
|
+
finally:
|
|
104
|
+
conn.close()
|
|
105
|
+
|
|
106
|
+
def _generate_summary(self, content: str, max_length: int = 300) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Generate extractive summary from content.
|
|
109
|
+
Uses sentence scoring based on heuristics (no external LLM).
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
content: Full content text
|
|
113
|
+
max_length: Maximum summary length in characters
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Extracted summary
|
|
117
|
+
"""
|
|
118
|
+
# Split into sentences
|
|
119
|
+
sentences = re.split(r'[.!?]+', content)
|
|
120
|
+
|
|
121
|
+
# Score sentences by importance (simple heuristic)
|
|
122
|
+
scored_sentences = []
|
|
123
|
+
|
|
124
|
+
for i, sent in enumerate(sentences):
|
|
125
|
+
sent = sent.strip()
|
|
126
|
+
if len(sent) < 10:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
score = 0
|
|
130
|
+
|
|
131
|
+
# Boost if contains tech terms
|
|
132
|
+
tech_terms = ['api', 'database', 'auth', 'component', 'function',
|
|
133
|
+
'class', 'method', 'variable', 'error', 'bug', 'fix',
|
|
134
|
+
'implement', 'refactor', 'test', 'deploy']
|
|
135
|
+
score += sum(1 for term in tech_terms if term in sent.lower())
|
|
136
|
+
|
|
137
|
+
# Boost if at start or end (thesis/conclusion)
|
|
138
|
+
if i == 0 or i == len(sentences) - 1:
|
|
139
|
+
score += 2
|
|
140
|
+
|
|
141
|
+
# Boost if contains numbers/specifics
|
|
142
|
+
if re.search(r'\d+', sent):
|
|
143
|
+
score += 1
|
|
144
|
+
|
|
145
|
+
# Boost if contains important keywords
|
|
146
|
+
important_keywords = ['important', 'critical', 'note', 'remember',
|
|
147
|
+
'key', 'main', 'primary', 'must', 'should']
|
|
148
|
+
score += sum(2 for kw in important_keywords if kw in sent.lower())
|
|
149
|
+
|
|
150
|
+
scored_sentences.append((score, sent))
|
|
151
|
+
|
|
152
|
+
# Take top sentences up to max_length
|
|
153
|
+
scored_sentences.sort(reverse=True, key=lambda x: x[0])
|
|
154
|
+
|
|
155
|
+
summary_parts = []
|
|
156
|
+
current_length = 0
|
|
157
|
+
|
|
158
|
+
for score, sent in scored_sentences:
|
|
159
|
+
if current_length + len(sent) > max_length:
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
summary_parts.append(sent)
|
|
163
|
+
current_length += len(sent)
|
|
164
|
+
|
|
165
|
+
if not summary_parts:
|
|
166
|
+
# Fallback: take first sentence
|
|
167
|
+
return sentences[0][:max_length] if sentences else content[:max_length]
|
|
168
|
+
|
|
169
|
+
return '. '.join(summary_parts) + '.'
|
|
170
|
+
|
|
171
|
+
def _extract_key_excerpts(self, content: str, max_excerpts: int = 3) -> List[str]:
|
|
172
|
+
"""
|
|
173
|
+
Extract key excerpts (code blocks, lists, important paragraphs).
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
content: Full content text
|
|
177
|
+
max_excerpts: Maximum number of excerpts to extract
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of excerpt strings
|
|
181
|
+
"""
|
|
182
|
+
excerpts = []
|
|
183
|
+
|
|
184
|
+
# Extract code blocks (markdown or indented)
|
|
185
|
+
code_blocks = re.findall(r'```[\s\S]*?```', content)
|
|
186
|
+
excerpts.extend(code_blocks[:2]) # Max 2 code blocks
|
|
187
|
+
|
|
188
|
+
# Extract bullet lists
|
|
189
|
+
list_pattern = r'(?:^|\n)(?:[-*•]|\d+\.)\s+.+(?:\n(?:[-*•]|\d+\.)\s+.+)*'
|
|
190
|
+
lists = re.findall(list_pattern, content, re.MULTILINE)
|
|
191
|
+
if lists and len(excerpts) < max_excerpts:
|
|
192
|
+
excerpts.extend(lists[:1]) # Max 1 list
|
|
193
|
+
|
|
194
|
+
# Extract paragraphs with important keywords if we need more
|
|
195
|
+
if len(excerpts) < max_excerpts:
|
|
196
|
+
paragraphs = content.split('\n\n')
|
|
197
|
+
important_keywords = ['important', 'critical', 'note', 'remember', 'key']
|
|
198
|
+
|
|
199
|
+
for para in paragraphs:
|
|
200
|
+
if len(excerpts) >= max_excerpts:
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
if any(kw in para.lower() for kw in important_keywords):
|
|
204
|
+
# Truncate long paragraphs
|
|
205
|
+
if len(para) > 200:
|
|
206
|
+
para = para[:197] + '...'
|
|
207
|
+
excerpts.append(para)
|
|
208
|
+
|
|
209
|
+
# Truncate if too many
|
|
210
|
+
return excerpts[:max_excerpts]
|
|
211
|
+
|
|
212
|
+
def compress_all_tier2(self) -> int:
|
|
213
|
+
"""Compress all memories that are in Tier 2."""
|
|
214
|
+
conn = sqlite3.connect(self.db_path)
|
|
215
|
+
try:
|
|
216
|
+
cursor = conn.cursor()
|
|
217
|
+
|
|
218
|
+
cursor.execute('SELECT id FROM memories WHERE tier = 2')
|
|
219
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
220
|
+
finally:
|
|
221
|
+
conn.close()
|
|
222
|
+
|
|
223
|
+
compressed_count = 0
|
|
224
|
+
for memory_id in memory_ids:
|
|
225
|
+
if self.compress_to_tier2(memory_id):
|
|
226
|
+
compressed_count += 1
|
|
227
|
+
|
|
228
|
+
return compressed_count
|