superlocalmemory 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +140 -0
- package/CHANGELOG.md +1749 -0
- package/LICENSE +21 -0
- package/README.md +600 -0
- package/bin/aider-smart +72 -0
- package/bin/slm +202 -0
- package/bin/slm-npm +73 -0
- package/bin/slm.bat +195 -0
- package/bin/slm.cmd +10 -0
- package/bin/superlocalmemoryv2:list +3 -0
- package/bin/superlocalmemoryv2:profile +3 -0
- package/bin/superlocalmemoryv2:recall +3 -0
- package/bin/superlocalmemoryv2:remember +3 -0
- package/bin/superlocalmemoryv2:reset +3 -0
- package/bin/superlocalmemoryv2:status +3 -0
- package/completions/slm.bash +58 -0
- package/completions/slm.zsh +76 -0
- package/configs/antigravity-mcp.json +13 -0
- package/configs/chatgpt-desktop-mcp.json +7 -0
- package/configs/claude-desktop-mcp.json +15 -0
- package/configs/codex-mcp.toml +13 -0
- package/configs/cody-commands.json +29 -0
- package/configs/continue-mcp.yaml +14 -0
- package/configs/continue-skills.yaml +26 -0
- package/configs/cursor-mcp.json +15 -0
- package/configs/gemini-cli-mcp.json +11 -0
- package/configs/jetbrains-mcp.json +11 -0
- package/configs/opencode-mcp.json +12 -0
- package/configs/perplexity-mcp.json +9 -0
- package/configs/vscode-copilot-mcp.json +12 -0
- package/configs/windsurf-mcp.json +16 -0
- package/configs/zed-mcp.json +12 -0
- package/docs/ARCHITECTURE.md +877 -0
- package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
- package/docs/COMPETITIVE-ANALYSIS.md +210 -0
- package/docs/COMPRESSION-README.md +390 -0
- package/docs/GRAPH-ENGINE.md +503 -0
- package/docs/MCP-MANUAL-SETUP.md +720 -0
- package/docs/MCP-TROUBLESHOOTING.md +787 -0
- package/docs/PATTERN-LEARNING.md +363 -0
- package/docs/PROFILES-GUIDE.md +453 -0
- package/docs/RESET-GUIDE.md +353 -0
- package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
- package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
- package/docs/UI-SERVER.md +254 -0
- package/docs/UNIVERSAL-INTEGRATION.md +432 -0
- package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
- package/docs/WINDOWS-INSTALL-README.txt +34 -0
- package/docs/WINDOWS-POST-INSTALL.txt +45 -0
- package/docs/example_graph_usage.py +148 -0
- package/hooks/memory-list-skill.js +130 -0
- package/hooks/memory-profile-skill.js +284 -0
- package/hooks/memory-recall-skill.js +109 -0
- package/hooks/memory-remember-skill.js +127 -0
- package/hooks/memory-reset-skill.js +274 -0
- package/install-skills.sh +436 -0
- package/install.ps1 +417 -0
- package/install.sh +755 -0
- package/mcp_server.py +585 -0
- package/package.json +94 -0
- package/requirements-core.txt +24 -0
- package/requirements.txt +10 -0
- package/scripts/postinstall.js +126 -0
- package/scripts/preuninstall.js +57 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +325 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
- package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
- package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
- package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
- package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
- package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
- package/src/cache_manager.py +520 -0
- package/src/embedding_engine.py +671 -0
- package/src/graph_engine.py +970 -0
- package/src/hnsw_index.py +626 -0
- package/src/hybrid_search.py +693 -0
- package/src/memory-profiles.py +518 -0
- package/src/memory-reset.py +485 -0
- package/src/memory_compression.py +999 -0
- package/src/memory_store_v2.py +1088 -0
- package/src/migrate_v1_to_v2.py +638 -0
- package/src/pattern_learner.py +898 -0
- package/src/query_optimizer.py +513 -0
- package/src/search_engine_v2.py +403 -0
- package/src/setup_validator.py +479 -0
- package/src/tree_manager.py +720 -0
|
@@ -0,0 +1,999 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Intelligent Local Memory System
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Progressive Summarization Compression for SuperLocalMemory
|
|
16
|
+
Tier-based compression system to maintain 100+ memories efficiently.
|
|
17
|
+
|
|
18
|
+
Tier Strategy:
|
|
19
|
+
- Tier 1 (0-30 days): Full content (no compression)
|
|
20
|
+
- Tier 2 (30-90 days): Summary + key excerpts (~80% reduction)
|
|
21
|
+
- Tier 3 (90+ days): Bullet points only (~96% reduction)
|
|
22
|
+
- Cold Storage (1+ year): Gzipped JSON archives (~98% reduction)
|
|
23
|
+
|
|
24
|
+
No external LLM calls - all compression is extractive using local algorithms.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import sqlite3
|
|
28
|
+
import json
|
|
29
|
+
import gzip
|
|
30
|
+
import re
|
|
31
|
+
from datetime import datetime, timedelta
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
34
|
+
import hashlib
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
38
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
39
|
+
CONFIG_PATH = MEMORY_DIR / "config.json"
|
|
40
|
+
COLD_STORAGE_PATH = MEMORY_DIR / "cold-storage"
|
|
41
|
+
LOGS_PATH = MEMORY_DIR / "logs"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CompressionConfig:
|
|
45
|
+
"""Configuration for compression behavior."""
|
|
46
|
+
|
|
47
|
+
def __init__(self):
|
|
48
|
+
self.config = self._load_config()
|
|
49
|
+
self.compression_settings = self.config.get('compression', {})
|
|
50
|
+
|
|
51
|
+
def _load_config(self) -> Dict[str, Any]:
|
|
52
|
+
"""Load configuration from config.json."""
|
|
53
|
+
if CONFIG_PATH.exists():
|
|
54
|
+
with open(CONFIG_PATH, 'r') as f:
|
|
55
|
+
return json.load(f)
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
def save(self):
|
|
59
|
+
"""Save configuration back to config.json."""
|
|
60
|
+
with open(CONFIG_PATH, 'w') as f:
|
|
61
|
+
json.dump(self.config, f, indent=2)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def enabled(self) -> bool:
|
|
65
|
+
return self.compression_settings.get('enabled', True)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def tier2_threshold_days(self) -> int:
|
|
69
|
+
return self.compression_settings.get('tier2_threshold_days', 30)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def tier3_threshold_days(self) -> int:
|
|
73
|
+
return self.compression_settings.get('tier3_threshold_days', 90)
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def cold_storage_threshold_days(self) -> int:
|
|
77
|
+
return self.compression_settings.get('cold_storage_threshold_days', 365)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def preserve_high_importance(self) -> bool:
|
|
81
|
+
return self.compression_settings.get('preserve_high_importance', True)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def preserve_recently_accessed(self) -> bool:
|
|
85
|
+
return self.compression_settings.get('preserve_recently_accessed', True)
|
|
86
|
+
|
|
87
|
+
def initialize_defaults(self):
|
|
88
|
+
"""Initialize compression settings in config if not present."""
|
|
89
|
+
if 'compression' not in self.config:
|
|
90
|
+
self.config['compression'] = {
|
|
91
|
+
'enabled': True,
|
|
92
|
+
'tier2_threshold_days': 30,
|
|
93
|
+
'tier3_threshold_days': 90,
|
|
94
|
+
'cold_storage_threshold_days': 365,
|
|
95
|
+
'preserve_high_importance': True,
|
|
96
|
+
'preserve_recently_accessed': True
|
|
97
|
+
}
|
|
98
|
+
self.save()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class TierClassifier:
|
|
102
|
+
"""Classify memories into compression tiers based on age and access patterns."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
105
|
+
self.db_path = db_path
|
|
106
|
+
self.config = CompressionConfig()
|
|
107
|
+
self._ensure_schema()
|
|
108
|
+
|
|
109
|
+
def _ensure_schema(self):
|
|
110
|
+
"""Add tier and access tracking columns if not present."""
|
|
111
|
+
conn = sqlite3.connect(self.db_path)
|
|
112
|
+
cursor = conn.cursor()
|
|
113
|
+
|
|
114
|
+
# Check if tier column exists
|
|
115
|
+
cursor.execute("PRAGMA table_info(memories)")
|
|
116
|
+
columns = [row[1] for row in cursor.fetchall()]
|
|
117
|
+
|
|
118
|
+
if 'tier' not in columns:
|
|
119
|
+
cursor.execute('ALTER TABLE memories ADD COLUMN tier INTEGER DEFAULT 1')
|
|
120
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_tier ON memories(tier)')
|
|
121
|
+
|
|
122
|
+
if 'last_accessed' not in columns:
|
|
123
|
+
cursor.execute('ALTER TABLE memories ADD COLUMN last_accessed TIMESTAMP')
|
|
124
|
+
|
|
125
|
+
if 'access_count' not in columns:
|
|
126
|
+
cursor.execute('ALTER TABLE memories ADD COLUMN access_count INTEGER DEFAULT 0')
|
|
127
|
+
|
|
128
|
+
# Create memory_archive table if not exists
|
|
129
|
+
cursor.execute('''
|
|
130
|
+
CREATE TABLE IF NOT EXISTS memory_archive (
|
|
131
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
132
|
+
memory_id INTEGER UNIQUE NOT NULL,
|
|
133
|
+
full_content TEXT NOT NULL,
|
|
134
|
+
archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
135
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
136
|
+
)
|
|
137
|
+
''')
|
|
138
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_archive_memory ON memory_archive(memory_id)')
|
|
139
|
+
|
|
140
|
+
conn.commit()
|
|
141
|
+
conn.close()
|
|
142
|
+
|
|
143
|
+
def classify_memories(self) -> List[Tuple[int, int]]:
|
|
144
|
+
"""
|
|
145
|
+
Classify all memories into tiers based on age and access.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
List of (tier, memory_id) tuples
|
|
149
|
+
"""
|
|
150
|
+
if not self.config.enabled:
|
|
151
|
+
return []
|
|
152
|
+
|
|
153
|
+
now = datetime.now()
|
|
154
|
+
conn = sqlite3.connect(self.db_path)
|
|
155
|
+
cursor = conn.cursor()
|
|
156
|
+
|
|
157
|
+
# Get all memories with access tracking
|
|
158
|
+
cursor.execute('''
|
|
159
|
+
SELECT id, created_at, last_accessed, access_count, importance, tier
|
|
160
|
+
FROM memories
|
|
161
|
+
''')
|
|
162
|
+
memories = cursor.fetchall()
|
|
163
|
+
|
|
164
|
+
tier_updates = []
|
|
165
|
+
|
|
166
|
+
for memory_id, created_at, last_accessed, access_count, importance, current_tier in memories:
|
|
167
|
+
created = datetime.fromisoformat(created_at)
|
|
168
|
+
age_days = (now - created).days
|
|
169
|
+
|
|
170
|
+
# Override: High-importance memories stay in Tier 1
|
|
171
|
+
if self.config.preserve_high_importance and importance and importance >= 8:
|
|
172
|
+
tier = 1
|
|
173
|
+
# Recently accessed stays in Tier 1
|
|
174
|
+
elif self.config.preserve_recently_accessed and last_accessed:
|
|
175
|
+
last_access = datetime.fromisoformat(last_accessed)
|
|
176
|
+
if (now - last_access).days < 7:
|
|
177
|
+
tier = 1
|
|
178
|
+
else:
|
|
179
|
+
tier = self._classify_by_age(age_days)
|
|
180
|
+
# Age-based classification
|
|
181
|
+
else:
|
|
182
|
+
tier = self._classify_by_age(age_days)
|
|
183
|
+
|
|
184
|
+
# Only update if tier changed
|
|
185
|
+
if tier != current_tier:
|
|
186
|
+
tier_updates.append((tier, memory_id))
|
|
187
|
+
|
|
188
|
+
# Update tier field
|
|
189
|
+
if tier_updates:
|
|
190
|
+
cursor.executemany('''
|
|
191
|
+
UPDATE memories SET tier = ? WHERE id = ?
|
|
192
|
+
''', tier_updates)
|
|
193
|
+
conn.commit()
|
|
194
|
+
|
|
195
|
+
conn.close()
|
|
196
|
+
return tier_updates
|
|
197
|
+
|
|
198
|
+
def _classify_by_age(self, age_days: int) -> int:
|
|
199
|
+
"""Classify memory tier based on age."""
|
|
200
|
+
if age_days < self.config.tier2_threshold_days:
|
|
201
|
+
return 1 # Recent
|
|
202
|
+
elif age_days < self.config.tier3_threshold_days:
|
|
203
|
+
return 2 # Active
|
|
204
|
+
else:
|
|
205
|
+
return 3 # Archived
|
|
206
|
+
|
|
207
|
+
def get_tier_stats(self) -> Dict[str, int]:
|
|
208
|
+
"""Get count of memories in each tier."""
|
|
209
|
+
conn = sqlite3.connect(self.db_path)
|
|
210
|
+
cursor = conn.cursor()
|
|
211
|
+
|
|
212
|
+
cursor.execute('''
|
|
213
|
+
SELECT tier, COUNT(*) FROM memories GROUP BY tier
|
|
214
|
+
''')
|
|
215
|
+
stats = dict(cursor.fetchall())
|
|
216
|
+
conn.close()
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
'tier1': stats.get(1, 0),
|
|
220
|
+
'tier2': stats.get(2, 0),
|
|
221
|
+
'tier3': stats.get(3, 0)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class Tier2Compressor:
|
|
226
|
+
"""Compress memories to summary + key excerpts (Tier 2)."""
|
|
227
|
+
|
|
228
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
229
|
+
self.db_path = db_path
|
|
230
|
+
|
|
231
|
+
def compress_to_tier2(self, memory_id: int) -> bool:
|
|
232
|
+
"""
|
|
233
|
+
Compress memory to summary + excerpts.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
memory_id: ID of memory to compress
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
True if compression succeeded, False otherwise
|
|
240
|
+
"""
|
|
241
|
+
conn = sqlite3.connect(self.db_path)
|
|
242
|
+
cursor = conn.cursor()
|
|
243
|
+
|
|
244
|
+
# Get full content
|
|
245
|
+
cursor.execute('''
|
|
246
|
+
SELECT content, summary, tier FROM memories WHERE id = ?
|
|
247
|
+
''', (memory_id,))
|
|
248
|
+
result = cursor.fetchone()
|
|
249
|
+
|
|
250
|
+
if not result:
|
|
251
|
+
conn.close()
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
content, existing_summary, current_tier = result
|
|
255
|
+
|
|
256
|
+
# Skip if already compressed or in wrong tier
|
|
257
|
+
if current_tier != 2:
|
|
258
|
+
conn.close()
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
# Check if already archived (don't re-compress)
|
|
262
|
+
cursor.execute('''
|
|
263
|
+
SELECT full_content FROM memory_archive WHERE memory_id = ?
|
|
264
|
+
''', (memory_id,))
|
|
265
|
+
if cursor.fetchone():
|
|
266
|
+
conn.close()
|
|
267
|
+
return True # Already compressed
|
|
268
|
+
|
|
269
|
+
# Try to parse as JSON (might already be compressed)
|
|
270
|
+
try:
|
|
271
|
+
parsed = json.loads(content)
|
|
272
|
+
if isinstance(parsed, dict) and 'summary' in parsed:
|
|
273
|
+
conn.close()
|
|
274
|
+
return True # Already compressed
|
|
275
|
+
except (json.JSONDecodeError, TypeError):
|
|
276
|
+
pass # Not compressed yet
|
|
277
|
+
|
|
278
|
+
# Generate/enhance summary if needed
|
|
279
|
+
if not existing_summary or len(existing_summary) < 100:
|
|
280
|
+
summary = self._generate_summary(content)
|
|
281
|
+
else:
|
|
282
|
+
summary = existing_summary
|
|
283
|
+
|
|
284
|
+
# Extract key excerpts (important sentences, code blocks, lists)
|
|
285
|
+
excerpts = self._extract_key_excerpts(content)
|
|
286
|
+
|
|
287
|
+
# Store compressed version
|
|
288
|
+
compressed_content = {
|
|
289
|
+
'summary': summary,
|
|
290
|
+
'excerpts': excerpts,
|
|
291
|
+
'original_length': len(content),
|
|
292
|
+
'compressed_at': datetime.now().isoformat()
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# Move full content to archive table
|
|
296
|
+
cursor.execute('''
|
|
297
|
+
INSERT INTO memory_archive (memory_id, full_content, archived_at)
|
|
298
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
299
|
+
''', (memory_id, content))
|
|
300
|
+
|
|
301
|
+
# Update memory with compressed version
|
|
302
|
+
cursor.execute('''
|
|
303
|
+
UPDATE memories
|
|
304
|
+
SET content = ?, tier = 2, updated_at = CURRENT_TIMESTAMP
|
|
305
|
+
WHERE id = ?
|
|
306
|
+
''', (json.dumps(compressed_content), memory_id))
|
|
307
|
+
|
|
308
|
+
conn.commit()
|
|
309
|
+
conn.close()
|
|
310
|
+
return True
|
|
311
|
+
|
|
312
|
+
def _generate_summary(self, content: str, max_length: int = 300) -> str:
|
|
313
|
+
"""
|
|
314
|
+
Generate extractive summary from content.
|
|
315
|
+
Uses sentence scoring based on heuristics (no external LLM).
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
content: Full content text
|
|
319
|
+
max_length: Maximum summary length in characters
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Extracted summary
|
|
323
|
+
"""
|
|
324
|
+
# Split into sentences
|
|
325
|
+
sentences = re.split(r'[.!?]+', content)
|
|
326
|
+
|
|
327
|
+
# Score sentences by importance (simple heuristic)
|
|
328
|
+
scored_sentences = []
|
|
329
|
+
|
|
330
|
+
for i, sent in enumerate(sentences):
|
|
331
|
+
sent = sent.strip()
|
|
332
|
+
if len(sent) < 10:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
score = 0
|
|
336
|
+
|
|
337
|
+
# Boost if contains tech terms
|
|
338
|
+
tech_terms = ['api', 'database', 'auth', 'component', 'function',
|
|
339
|
+
'class', 'method', 'variable', 'error', 'bug', 'fix',
|
|
340
|
+
'implement', 'refactor', 'test', 'deploy']
|
|
341
|
+
score += sum(1 for term in tech_terms if term in sent.lower())
|
|
342
|
+
|
|
343
|
+
# Boost if at start or end (thesis/conclusion)
|
|
344
|
+
if i == 0 or i == len(sentences) - 1:
|
|
345
|
+
score += 2
|
|
346
|
+
|
|
347
|
+
# Boost if contains numbers/specifics
|
|
348
|
+
if re.search(r'\d+', sent):
|
|
349
|
+
score += 1
|
|
350
|
+
|
|
351
|
+
# Boost if contains important keywords
|
|
352
|
+
important_keywords = ['important', 'critical', 'note', 'remember',
|
|
353
|
+
'key', 'main', 'primary', 'must', 'should']
|
|
354
|
+
score += sum(2 for kw in important_keywords if kw in sent.lower())
|
|
355
|
+
|
|
356
|
+
scored_sentences.append((score, sent))
|
|
357
|
+
|
|
358
|
+
# Take top sentences up to max_length
|
|
359
|
+
scored_sentences.sort(reverse=True, key=lambda x: x[0])
|
|
360
|
+
|
|
361
|
+
summary_parts = []
|
|
362
|
+
current_length = 0
|
|
363
|
+
|
|
364
|
+
for score, sent in scored_sentences:
|
|
365
|
+
if current_length + len(sent) > max_length:
|
|
366
|
+
break
|
|
367
|
+
|
|
368
|
+
summary_parts.append(sent)
|
|
369
|
+
current_length += len(sent)
|
|
370
|
+
|
|
371
|
+
if not summary_parts:
|
|
372
|
+
# Fallback: take first sentence
|
|
373
|
+
return sentences[0][:max_length] if sentences else content[:max_length]
|
|
374
|
+
|
|
375
|
+
return '. '.join(summary_parts) + '.'
|
|
376
|
+
|
|
377
|
+
def _extract_key_excerpts(self, content: str, max_excerpts: int = 3) -> List[str]:
|
|
378
|
+
"""
|
|
379
|
+
Extract key excerpts (code blocks, lists, important paragraphs).
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
content: Full content text
|
|
383
|
+
max_excerpts: Maximum number of excerpts to extract
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
List of excerpt strings
|
|
387
|
+
"""
|
|
388
|
+
excerpts = []
|
|
389
|
+
|
|
390
|
+
# Extract code blocks (markdown or indented)
|
|
391
|
+
code_blocks = re.findall(r'```[\s\S]*?```', content)
|
|
392
|
+
excerpts.extend(code_blocks[:2]) # Max 2 code blocks
|
|
393
|
+
|
|
394
|
+
# Extract bullet lists
|
|
395
|
+
list_pattern = r'(?:^|\n)(?:[-*•]|\d+\.)\s+.+(?:\n(?:[-*•]|\d+\.)\s+.+)*'
|
|
396
|
+
lists = re.findall(list_pattern, content, re.MULTILINE)
|
|
397
|
+
if lists and len(excerpts) < max_excerpts:
|
|
398
|
+
excerpts.extend(lists[:1]) # Max 1 list
|
|
399
|
+
|
|
400
|
+
# Extract paragraphs with important keywords if we need more
|
|
401
|
+
if len(excerpts) < max_excerpts:
|
|
402
|
+
paragraphs = content.split('\n\n')
|
|
403
|
+
important_keywords = ['important', 'critical', 'note', 'remember', 'key']
|
|
404
|
+
|
|
405
|
+
for para in paragraphs:
|
|
406
|
+
if len(excerpts) >= max_excerpts:
|
|
407
|
+
break
|
|
408
|
+
|
|
409
|
+
if any(kw in para.lower() for kw in important_keywords):
|
|
410
|
+
# Truncate long paragraphs
|
|
411
|
+
if len(para) > 200:
|
|
412
|
+
para = para[:197] + '...'
|
|
413
|
+
excerpts.append(para)
|
|
414
|
+
|
|
415
|
+
# Truncate if too many
|
|
416
|
+
return excerpts[:max_excerpts]
|
|
417
|
+
|
|
418
|
+
def compress_all_tier2(self) -> int:
|
|
419
|
+
"""Compress all memories that are in Tier 2."""
|
|
420
|
+
conn = sqlite3.connect(self.db_path)
|
|
421
|
+
cursor = conn.cursor()
|
|
422
|
+
|
|
423
|
+
cursor.execute('SELECT id FROM memories WHERE tier = 2')
|
|
424
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
425
|
+
conn.close()
|
|
426
|
+
|
|
427
|
+
compressed_count = 0
|
|
428
|
+
for memory_id in memory_ids:
|
|
429
|
+
if self.compress_to_tier2(memory_id):
|
|
430
|
+
compressed_count += 1
|
|
431
|
+
|
|
432
|
+
return compressed_count
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class Tier3Compressor:
|
|
436
|
+
"""Compress memories to bullet points only (Tier 3)."""
|
|
437
|
+
|
|
438
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
439
|
+
self.db_path = db_path
|
|
440
|
+
|
|
441
|
+
def compress_to_tier3(self, memory_id: int) -> bool:
|
|
442
|
+
"""
|
|
443
|
+
Compress memory to bullet points only.
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
memory_id: ID of memory to compress
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
True if compression succeeded, False otherwise
|
|
450
|
+
"""
|
|
451
|
+
conn = sqlite3.connect(self.db_path)
|
|
452
|
+
cursor = conn.cursor()
|
|
453
|
+
|
|
454
|
+
# Get Tier 2 compressed content
|
|
455
|
+
cursor.execute('''
|
|
456
|
+
SELECT content, tier FROM memories WHERE id = ?
|
|
457
|
+
''', (memory_id,))
|
|
458
|
+
result = cursor.fetchone()
|
|
459
|
+
|
|
460
|
+
if not result:
|
|
461
|
+
conn.close()
|
|
462
|
+
return False
|
|
463
|
+
|
|
464
|
+
content, current_tier = result
|
|
465
|
+
|
|
466
|
+
# Skip if in wrong tier
|
|
467
|
+
if current_tier != 3:
|
|
468
|
+
conn.close()
|
|
469
|
+
return False
|
|
470
|
+
|
|
471
|
+
# Try to parse as Tier 2 compressed content
|
|
472
|
+
try:
|
|
473
|
+
compressed_content = json.loads(content)
|
|
474
|
+
|
|
475
|
+
# Check if already Tier 3
|
|
476
|
+
if isinstance(compressed_content, dict) and 'bullets' in compressed_content:
|
|
477
|
+
conn.close()
|
|
478
|
+
return True # Already Tier 3
|
|
479
|
+
|
|
480
|
+
# Get summary from Tier 2
|
|
481
|
+
if isinstance(compressed_content, dict) and 'summary' in compressed_content:
|
|
482
|
+
summary = compressed_content.get('summary', '')
|
|
483
|
+
tier2_archived_at = compressed_content.get('compressed_at')
|
|
484
|
+
original_length = compressed_content.get('original_length', 0)
|
|
485
|
+
else:
|
|
486
|
+
# Not Tier 2 format, treat as plain text
|
|
487
|
+
summary = content
|
|
488
|
+
tier2_archived_at = None
|
|
489
|
+
original_length = len(content)
|
|
490
|
+
|
|
491
|
+
except (json.JSONDecodeError, TypeError):
|
|
492
|
+
# Not JSON, treat as plain text
|
|
493
|
+
summary = content
|
|
494
|
+
tier2_archived_at = None
|
|
495
|
+
original_length = len(content)
|
|
496
|
+
|
|
497
|
+
# Convert summary to bullet points (max 5)
|
|
498
|
+
bullet_points = self._summarize_to_bullets(summary)
|
|
499
|
+
|
|
500
|
+
# Ultra-compressed version
|
|
501
|
+
ultra_compressed = {
|
|
502
|
+
'bullets': bullet_points,
|
|
503
|
+
'tier2_archived_at': tier2_archived_at,
|
|
504
|
+
'original_length': original_length,
|
|
505
|
+
'compressed_to_tier3_at': datetime.now().isoformat()
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
# Update memory
|
|
509
|
+
cursor.execute('''
|
|
510
|
+
UPDATE memories
|
|
511
|
+
SET content = ?, tier = 3, updated_at = CURRENT_TIMESTAMP
|
|
512
|
+
WHERE id = ?
|
|
513
|
+
''', (json.dumps(ultra_compressed), memory_id))
|
|
514
|
+
|
|
515
|
+
conn.commit()
|
|
516
|
+
conn.close()
|
|
517
|
+
return True
|
|
518
|
+
|
|
519
|
+
def _summarize_to_bullets(self, summary: str, max_bullets: int = 5) -> List[str]:
|
|
520
|
+
"""
|
|
521
|
+
Convert summary to bullet points.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
summary: Summary text
|
|
525
|
+
max_bullets: Maximum number of bullets
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
List of bullet point strings
|
|
529
|
+
"""
|
|
530
|
+
# Split into sentences
|
|
531
|
+
sentences = re.split(r'[.!?]+', summary)
|
|
532
|
+
|
|
533
|
+
bullets = []
|
|
534
|
+
|
|
535
|
+
for sent in sentences:
|
|
536
|
+
sent = sent.strip()
|
|
537
|
+
|
|
538
|
+
if len(sent) < 10:
|
|
539
|
+
continue
|
|
540
|
+
|
|
541
|
+
# Truncate long sentences
|
|
542
|
+
if len(sent) > 80:
|
|
543
|
+
sent = sent[:77] + '...'
|
|
544
|
+
|
|
545
|
+
bullets.append(sent)
|
|
546
|
+
|
|
547
|
+
if len(bullets) >= max_bullets:
|
|
548
|
+
break
|
|
549
|
+
|
|
550
|
+
return bullets if bullets else ['[No summary available]']
|
|
551
|
+
|
|
552
|
+
def compress_all_tier3(self) -> int:
|
|
553
|
+
"""Compress all memories that are in Tier 3."""
|
|
554
|
+
conn = sqlite3.connect(self.db_path)
|
|
555
|
+
cursor = conn.cursor()
|
|
556
|
+
|
|
557
|
+
cursor.execute('SELECT id FROM memories WHERE tier = 3')
|
|
558
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
559
|
+
conn.close()
|
|
560
|
+
|
|
561
|
+
compressed_count = 0
|
|
562
|
+
for memory_id in memory_ids:
|
|
563
|
+
if self.compress_to_tier3(memory_id):
|
|
564
|
+
compressed_count += 1
|
|
565
|
+
|
|
566
|
+
return compressed_count
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class ColdStorageManager:
|
|
570
|
+
"""Manage cold storage archives for very old memories."""
|
|
571
|
+
|
|
572
|
+
def __init__(self, db_path: Path = DB_PATH, storage_path: Path = COLD_STORAGE_PATH):
|
|
573
|
+
self.db_path = db_path
|
|
574
|
+
self.storage_path = storage_path
|
|
575
|
+
self.storage_path.mkdir(exist_ok=True)
|
|
576
|
+
self.config = CompressionConfig()
|
|
577
|
+
|
|
578
|
+
def move_to_cold_storage(self, memory_ids: List[int]) -> int:
|
|
579
|
+
"""
|
|
580
|
+
Move archived memories to gzipped JSON file.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
memory_ids: List of memory IDs to archive
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
Number of memories archived
|
|
587
|
+
"""
|
|
588
|
+
if not memory_ids:
|
|
589
|
+
return 0
|
|
590
|
+
|
|
591
|
+
conn = sqlite3.connect(self.db_path)
|
|
592
|
+
cursor = conn.cursor()
|
|
593
|
+
|
|
594
|
+
# Build placeholders for SQL query
|
|
595
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
596
|
+
|
|
597
|
+
# Get memories from archive table
|
|
598
|
+
cursor.execute(f'''
|
|
599
|
+
SELECT m.id, m.content, m.summary, m.tags, m.project_name,
|
|
600
|
+
m.created_at, a.full_content
|
|
601
|
+
FROM memories m
|
|
602
|
+
LEFT JOIN memory_archive a ON m.id = a.memory_id
|
|
603
|
+
WHERE m.id IN ({placeholders})
|
|
604
|
+
''', memory_ids)
|
|
605
|
+
|
|
606
|
+
memories = cursor.fetchall()
|
|
607
|
+
|
|
608
|
+
if not memories:
|
|
609
|
+
conn.close()
|
|
610
|
+
return 0
|
|
611
|
+
|
|
612
|
+
# Build JSON export
|
|
613
|
+
export_data = []
|
|
614
|
+
|
|
615
|
+
for memory in memories:
|
|
616
|
+
mem_id, content, summary, tags, project_name, created_at, full_content = memory
|
|
617
|
+
|
|
618
|
+
export_data.append({
|
|
619
|
+
'id': mem_id,
|
|
620
|
+
'tier3_content': self._safe_json_load(content),
|
|
621
|
+
'summary': summary,
|
|
622
|
+
'tags': self._safe_json_load(tags) if tags else [],
|
|
623
|
+
'project': project_name,
|
|
624
|
+
'created_at': created_at,
|
|
625
|
+
'full_content': full_content # May be None if not archived
|
|
626
|
+
})
|
|
627
|
+
|
|
628
|
+
# Write to gzipped file
|
|
629
|
+
filename = f"archive-{datetime.now().strftime('%Y-%m')}.json.gz"
|
|
630
|
+
filepath = self.storage_path / filename
|
|
631
|
+
|
|
632
|
+
# If file exists, append to it
|
|
633
|
+
existing_data = []
|
|
634
|
+
if filepath.exists():
|
|
635
|
+
try:
|
|
636
|
+
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
|
|
637
|
+
existing_data = json.load(f)
|
|
638
|
+
except Exception:
|
|
639
|
+
pass # File might be corrupted, start fresh
|
|
640
|
+
|
|
641
|
+
# Merge with existing data (avoid duplicates)
|
|
642
|
+
existing_ids = {item['id'] for item in existing_data}
|
|
643
|
+
for item in export_data:
|
|
644
|
+
if item['id'] not in existing_ids:
|
|
645
|
+
existing_data.append(item)
|
|
646
|
+
|
|
647
|
+
# Write combined data
|
|
648
|
+
with gzip.open(filepath, 'wt', encoding='utf-8') as f:
|
|
649
|
+
json.dump(existing_data, f, indent=2)
|
|
650
|
+
|
|
651
|
+
# Delete from archive table (keep Tier 3 version in main table)
|
|
652
|
+
cursor.executemany('DELETE FROM memory_archive WHERE memory_id = ?',
|
|
653
|
+
[(mid,) for mid in memory_ids])
|
|
654
|
+
|
|
655
|
+
conn.commit()
|
|
656
|
+
conn.close()
|
|
657
|
+
|
|
658
|
+
return len(export_data)
|
|
659
|
+
|
|
660
|
+
def _safe_json_load(self, data: str) -> Any:
|
|
661
|
+
"""Safely load JSON data."""
|
|
662
|
+
try:
|
|
663
|
+
return json.loads(data)
|
|
664
|
+
except (json.JSONDecodeError, TypeError):
|
|
665
|
+
return data
|
|
666
|
+
|
|
667
|
+
def restore_from_cold_storage(self, memory_id: int) -> Optional[str]:
|
|
668
|
+
"""
|
|
669
|
+
Restore full content from cold storage archive.
|
|
670
|
+
|
|
671
|
+
Args:
|
|
672
|
+
memory_id: ID of memory to restore
|
|
673
|
+
|
|
674
|
+
Returns:
|
|
675
|
+
Full content if found, None otherwise
|
|
676
|
+
"""
|
|
677
|
+
# Search all archive files
|
|
678
|
+
for archive_file in self.storage_path.glob('archive-*.json.gz'):
|
|
679
|
+
try:
|
|
680
|
+
with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
|
|
681
|
+
data = json.load(f)
|
|
682
|
+
|
|
683
|
+
for memory in data:
|
|
684
|
+
if memory['id'] == memory_id:
|
|
685
|
+
full_content = memory.get('full_content')
|
|
686
|
+
|
|
687
|
+
if full_content:
|
|
688
|
+
# Restore to archive table
|
|
689
|
+
conn = sqlite3.connect(self.db_path)
|
|
690
|
+
cursor = conn.cursor()
|
|
691
|
+
|
|
692
|
+
cursor.execute('''
|
|
693
|
+
INSERT OR REPLACE INTO memory_archive
|
|
694
|
+
(memory_id, full_content, archived_at)
|
|
695
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
696
|
+
''', (memory_id, full_content))
|
|
697
|
+
|
|
698
|
+
conn.commit()
|
|
699
|
+
conn.close()
|
|
700
|
+
|
|
701
|
+
return full_content
|
|
702
|
+
except Exception as e:
|
|
703
|
+
print(f"Error reading archive {archive_file}: {e}")
|
|
704
|
+
continue
|
|
705
|
+
|
|
706
|
+
return None
|
|
707
|
+
|
|
708
|
+
def get_cold_storage_candidates(self) -> List[int]:
|
|
709
|
+
"""Get memory IDs that are candidates for cold storage."""
|
|
710
|
+
threshold_date = datetime.now() - timedelta(days=self.config.cold_storage_threshold_days)
|
|
711
|
+
|
|
712
|
+
conn = sqlite3.connect(self.db_path)
|
|
713
|
+
cursor = conn.cursor()
|
|
714
|
+
|
|
715
|
+
cursor.execute('''
|
|
716
|
+
SELECT id FROM memories
|
|
717
|
+
WHERE tier = 3
|
|
718
|
+
AND created_at < ?
|
|
719
|
+
AND importance < 8
|
|
720
|
+
''', (threshold_date.isoformat(),))
|
|
721
|
+
|
|
722
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
723
|
+
conn.close()
|
|
724
|
+
|
|
725
|
+
return memory_ids
|
|
726
|
+
|
|
727
|
+
def get_cold_storage_stats(self) -> Dict[str, Any]:
|
|
728
|
+
"""Get statistics about cold storage."""
|
|
729
|
+
stats = {
|
|
730
|
+
'archive_count': 0,
|
|
731
|
+
'total_memories': 0,
|
|
732
|
+
'total_size_bytes': 0,
|
|
733
|
+
'archives': []
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
for archive_file in self.storage_path.glob('archive-*.json.gz'):
|
|
737
|
+
try:
|
|
738
|
+
size = archive_file.stat().st_size
|
|
739
|
+
|
|
740
|
+
with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
|
|
741
|
+
data = json.load(f)
|
|
742
|
+
memory_count = len(data)
|
|
743
|
+
|
|
744
|
+
stats['archive_count'] += 1
|
|
745
|
+
stats['total_memories'] += memory_count
|
|
746
|
+
stats['total_size_bytes'] += size
|
|
747
|
+
|
|
748
|
+
stats['archives'].append({
|
|
749
|
+
'filename': archive_file.name,
|
|
750
|
+
'memory_count': memory_count,
|
|
751
|
+
'size_bytes': size,
|
|
752
|
+
'size_mb': round(size / 1024 / 1024, 2)
|
|
753
|
+
})
|
|
754
|
+
except Exception:
|
|
755
|
+
continue
|
|
756
|
+
|
|
757
|
+
return stats
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
class CompressionOrchestrator:
|
|
761
|
+
"""Main orchestrator for compression operations."""
|
|
762
|
+
|
|
763
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
764
|
+
self.db_path = db_path
|
|
765
|
+
self.config = CompressionConfig()
|
|
766
|
+
self.classifier = TierClassifier(db_path)
|
|
767
|
+
self.tier2_compressor = Tier2Compressor(db_path)
|
|
768
|
+
self.tier3_compressor = Tier3Compressor(db_path)
|
|
769
|
+
self.cold_storage = ColdStorageManager(db_path)
|
|
770
|
+
|
|
771
|
+
def run_full_compression(self) -> Dict[str, Any]:
|
|
772
|
+
"""
|
|
773
|
+
Run full compression cycle: classify, compress, and archive.
|
|
774
|
+
|
|
775
|
+
Returns:
|
|
776
|
+
Statistics about compression operation
|
|
777
|
+
"""
|
|
778
|
+
if not self.config.enabled:
|
|
779
|
+
return {'status': 'disabled', 'message': 'Compression is disabled in config'}
|
|
780
|
+
|
|
781
|
+
stats = {
|
|
782
|
+
'started_at': datetime.now().isoformat(),
|
|
783
|
+
'tier_updates': 0,
|
|
784
|
+
'tier2_compressed': 0,
|
|
785
|
+
'tier3_compressed': 0,
|
|
786
|
+
'cold_stored': 0,
|
|
787
|
+
'errors': []
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
try:
|
|
791
|
+
# Step 1: Classify memories into tiers
|
|
792
|
+
tier_updates = self.classifier.classify_memories()
|
|
793
|
+
stats['tier_updates'] = len(tier_updates)
|
|
794
|
+
|
|
795
|
+
# Step 2: Compress Tier 2 memories
|
|
796
|
+
stats['tier2_compressed'] = self.tier2_compressor.compress_all_tier2()
|
|
797
|
+
|
|
798
|
+
# Step 3: Compress Tier 3 memories
|
|
799
|
+
stats['tier3_compressed'] = self.tier3_compressor.compress_all_tier3()
|
|
800
|
+
|
|
801
|
+
# Step 4: Move old memories to cold storage
|
|
802
|
+
candidates = self.cold_storage.get_cold_storage_candidates()
|
|
803
|
+
if candidates:
|
|
804
|
+
stats['cold_stored'] = self.cold_storage.move_to_cold_storage(candidates)
|
|
805
|
+
|
|
806
|
+
# Get final tier stats
|
|
807
|
+
stats['tier_stats'] = self.classifier.get_tier_stats()
|
|
808
|
+
|
|
809
|
+
# Calculate space savings
|
|
810
|
+
stats['space_savings'] = self._calculate_space_savings()
|
|
811
|
+
|
|
812
|
+
except Exception as e:
|
|
813
|
+
stats['errors'].append(str(e))
|
|
814
|
+
|
|
815
|
+
stats['completed_at'] = datetime.now().isoformat()
|
|
816
|
+
return stats
|
|
817
|
+
|
|
818
|
+
def _calculate_space_savings(self) -> Dict[str, Any]:
|
|
819
|
+
"""Calculate estimated space savings from compression."""
|
|
820
|
+
conn = sqlite3.connect(self.db_path)
|
|
821
|
+
cursor = conn.cursor()
|
|
822
|
+
|
|
823
|
+
# Get size of compressed content
|
|
824
|
+
cursor.execute('''
|
|
825
|
+
SELECT
|
|
826
|
+
tier,
|
|
827
|
+
COUNT(*) as count,
|
|
828
|
+
SUM(LENGTH(content)) as total_size
|
|
829
|
+
FROM memories
|
|
830
|
+
GROUP BY tier
|
|
831
|
+
''')
|
|
832
|
+
|
|
833
|
+
tier_sizes = {}
|
|
834
|
+
for tier, count, total_size in cursor.fetchall():
|
|
835
|
+
tier_sizes[tier] = {
|
|
836
|
+
'count': count,
|
|
837
|
+
'size_bytes': total_size or 0
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
# Get size of archived content
|
|
841
|
+
cursor.execute('''
|
|
842
|
+
SELECT
|
|
843
|
+
COUNT(*) as count,
|
|
844
|
+
SUM(LENGTH(full_content)) as total_size
|
|
845
|
+
FROM memory_archive
|
|
846
|
+
''')
|
|
847
|
+
archive_count, archive_size = cursor.fetchone()
|
|
848
|
+
|
|
849
|
+
conn.close()
|
|
850
|
+
|
|
851
|
+
# Estimate original size if all were Tier 1
|
|
852
|
+
tier1_avg = tier_sizes.get(1, {}).get('size_bytes', 50000) / max(tier_sizes.get(1, {}).get('count', 1), 1)
|
|
853
|
+
total_memories = sum(t.get('count', 0) for t in tier_sizes.values())
|
|
854
|
+
estimated_original = int(tier1_avg * total_memories)
|
|
855
|
+
|
|
856
|
+
current_size = sum(t.get('size_bytes', 0) for t in tier_sizes.values())
|
|
857
|
+
|
|
858
|
+
return {
|
|
859
|
+
'estimated_original_bytes': estimated_original,
|
|
860
|
+
'current_size_bytes': current_size,
|
|
861
|
+
'savings_bytes': estimated_original - current_size,
|
|
862
|
+
'savings_percent': round((1 - current_size / max(estimated_original, 1)) * 100, 1),
|
|
863
|
+
'tier_breakdown': tier_sizes,
|
|
864
|
+
'archive_count': archive_count or 0,
|
|
865
|
+
'archive_size_bytes': archive_size or 0
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
# CLI Interface
|
|
870
|
+
if __name__ == "__main__":
|
|
871
|
+
import sys
|
|
872
|
+
|
|
873
|
+
if len(sys.argv) < 2:
|
|
874
|
+
print("Progressive Summarization Compression for SuperLocalMemory\n")
|
|
875
|
+
print("Usage:")
|
|
876
|
+
print(" python compression.py classify # Classify memories into tiers")
|
|
877
|
+
print(" python compression.py compress # Run full compression cycle")
|
|
878
|
+
print(" python compression.py stats # Show compression statistics")
|
|
879
|
+
print(" python compression.py tier2 <id> # Compress specific memory to Tier 2")
|
|
880
|
+
print(" python compression.py tier3 <id> # Compress specific memory to Tier 3")
|
|
881
|
+
print(" python compression.py cold-storage # Move old memories to cold storage")
|
|
882
|
+
print(" python compression.py restore <id> # Restore memory from cold storage")
|
|
883
|
+
print(" python compression.py init-config # Initialize compression config")
|
|
884
|
+
sys.exit(0)
|
|
885
|
+
|
|
886
|
+
command = sys.argv[1]
|
|
887
|
+
orchestrator = CompressionOrchestrator()
|
|
888
|
+
|
|
889
|
+
if command == "classify":
|
|
890
|
+
classifier = TierClassifier()
|
|
891
|
+
updates = classifier.classify_memories()
|
|
892
|
+
print(f"Classified {len(updates)} memories")
|
|
893
|
+
|
|
894
|
+
stats = classifier.get_tier_stats()
|
|
895
|
+
print(f"\nTier breakdown:")
|
|
896
|
+
print(f" Tier 1 (Full content): {stats['tier1']} memories")
|
|
897
|
+
print(f" Tier 2 (Summary+excerpts): {stats['tier2']} memories")
|
|
898
|
+
print(f" Tier 3 (Bullets only): {stats['tier3']} memories")
|
|
899
|
+
|
|
900
|
+
elif command == "compress":
|
|
901
|
+
print("Running full compression cycle...")
|
|
902
|
+
stats = orchestrator.run_full_compression()
|
|
903
|
+
|
|
904
|
+
print(f"\nCompression Results:")
|
|
905
|
+
print(f" Tier updates: {stats['tier_updates']}")
|
|
906
|
+
print(f" Tier 2 compressed: {stats['tier2_compressed']}")
|
|
907
|
+
print(f" Tier 3 compressed: {stats['tier3_compressed']}")
|
|
908
|
+
print(f" Moved to cold storage: {stats['cold_stored']}")
|
|
909
|
+
|
|
910
|
+
if 'space_savings' in stats:
|
|
911
|
+
savings = stats['space_savings']
|
|
912
|
+
print(f"\nSpace Savings:")
|
|
913
|
+
print(f" Original size: {savings['estimated_original_bytes']:,} bytes")
|
|
914
|
+
print(f" Current size: {savings['current_size_bytes']:,} bytes")
|
|
915
|
+
print(f" Savings: {savings['savings_bytes']:,} bytes ({savings['savings_percent']}%)")
|
|
916
|
+
|
|
917
|
+
if stats.get('errors'):
|
|
918
|
+
print(f"\nErrors: {stats['errors']}")
|
|
919
|
+
|
|
920
|
+
elif command == "stats":
|
|
921
|
+
classifier = TierClassifier()
|
|
922
|
+
tier_stats = classifier.get_tier_stats()
|
|
923
|
+
|
|
924
|
+
cold_storage = ColdStorageManager()
|
|
925
|
+
cold_stats = cold_storage.get_cold_storage_stats()
|
|
926
|
+
|
|
927
|
+
savings = orchestrator._calculate_space_savings()
|
|
928
|
+
|
|
929
|
+
print("Compression Statistics\n")
|
|
930
|
+
print("Tier Breakdown:")
|
|
931
|
+
print(f" Tier 1 (Full content): {tier_stats['tier1']} memories")
|
|
932
|
+
print(f" Tier 2 (Summary+excerpts): {tier_stats['tier2']} memories")
|
|
933
|
+
print(f" Tier 3 (Bullets only): {tier_stats['tier3']} memories")
|
|
934
|
+
|
|
935
|
+
print(f"\nCold Storage:")
|
|
936
|
+
print(f" Archive files: {cold_stats['archive_count']}")
|
|
937
|
+
print(f" Total memories: {cold_stats['total_memories']}")
|
|
938
|
+
print(f" Total size: {cold_stats['total_size_bytes']:,} bytes")
|
|
939
|
+
|
|
940
|
+
print(f"\nSpace Savings:")
|
|
941
|
+
print(f" Estimated original: {savings['estimated_original_bytes']:,} bytes")
|
|
942
|
+
print(f" Current size: {savings['current_size_bytes']:,} bytes")
|
|
943
|
+
print(f" Savings: {savings['savings_bytes']:,} bytes ({savings['savings_percent']}%)")
|
|
944
|
+
|
|
945
|
+
elif command == "tier2" and len(sys.argv) >= 3:
|
|
946
|
+
try:
|
|
947
|
+
memory_id = int(sys.argv[2])
|
|
948
|
+
compressor = Tier2Compressor()
|
|
949
|
+
if compressor.compress_to_tier2(memory_id):
|
|
950
|
+
print(f"Memory #{memory_id} compressed to Tier 2")
|
|
951
|
+
else:
|
|
952
|
+
print(f"Failed to compress memory #{memory_id}")
|
|
953
|
+
except ValueError:
|
|
954
|
+
print("Error: Memory ID must be a number")
|
|
955
|
+
|
|
956
|
+
elif command == "tier3" and len(sys.argv) >= 3:
|
|
957
|
+
try:
|
|
958
|
+
memory_id = int(sys.argv[2])
|
|
959
|
+
compressor = Tier3Compressor()
|
|
960
|
+
if compressor.compress_to_tier3(memory_id):
|
|
961
|
+
print(f"Memory #{memory_id} compressed to Tier 3")
|
|
962
|
+
else:
|
|
963
|
+
print(f"Failed to compress memory #{memory_id}")
|
|
964
|
+
except ValueError:
|
|
965
|
+
print("Error: Memory ID must be a number")
|
|
966
|
+
|
|
967
|
+
elif command == "cold-storage":
|
|
968
|
+
cold_storage = ColdStorageManager()
|
|
969
|
+
candidates = cold_storage.get_cold_storage_candidates()
|
|
970
|
+
|
|
971
|
+
if not candidates:
|
|
972
|
+
print("No memories ready for cold storage")
|
|
973
|
+
else:
|
|
974
|
+
print(f"Moving {len(candidates)} memories to cold storage...")
|
|
975
|
+
count = cold_storage.move_to_cold_storage(candidates)
|
|
976
|
+
print(f"Archived {count} memories")
|
|
977
|
+
|
|
978
|
+
elif command == "restore" and len(sys.argv) >= 3:
|
|
979
|
+
try:
|
|
980
|
+
memory_id = int(sys.argv[2])
|
|
981
|
+
cold_storage = ColdStorageManager()
|
|
982
|
+
content = cold_storage.restore_from_cold_storage(memory_id)
|
|
983
|
+
|
|
984
|
+
if content:
|
|
985
|
+
print(f"Memory #{memory_id} restored from cold storage")
|
|
986
|
+
else:
|
|
987
|
+
print(f"Memory #{memory_id} not found in cold storage")
|
|
988
|
+
except ValueError:
|
|
989
|
+
print("Error: Memory ID must be a number")
|
|
990
|
+
|
|
991
|
+
elif command == "init-config":
|
|
992
|
+
config = CompressionConfig()
|
|
993
|
+
config.initialize_defaults()
|
|
994
|
+
print("Compression configuration initialized")
|
|
995
|
+
print(json.dumps(config.compression_settings, indent=2))
|
|
996
|
+
|
|
997
|
+
else:
|
|
998
|
+
print(f"Unknown command: {command}")
|
|
999
|
+
sys.exit(1)
|