superlocalmemory 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/ATTRIBUTION.md +140 -0
  2. package/CHANGELOG.md +1749 -0
  3. package/LICENSE +21 -0
  4. package/README.md +600 -0
  5. package/bin/aider-smart +72 -0
  6. package/bin/slm +202 -0
  7. package/bin/slm-npm +73 -0
  8. package/bin/slm.bat +195 -0
  9. package/bin/slm.cmd +10 -0
  10. package/bin/superlocalmemoryv2:list +3 -0
  11. package/bin/superlocalmemoryv2:profile +3 -0
  12. package/bin/superlocalmemoryv2:recall +3 -0
  13. package/bin/superlocalmemoryv2:remember +3 -0
  14. package/bin/superlocalmemoryv2:reset +3 -0
  15. package/bin/superlocalmemoryv2:status +3 -0
  16. package/completions/slm.bash +58 -0
  17. package/completions/slm.zsh +76 -0
  18. package/configs/antigravity-mcp.json +13 -0
  19. package/configs/chatgpt-desktop-mcp.json +7 -0
  20. package/configs/claude-desktop-mcp.json +15 -0
  21. package/configs/codex-mcp.toml +13 -0
  22. package/configs/cody-commands.json +29 -0
  23. package/configs/continue-mcp.yaml +14 -0
  24. package/configs/continue-skills.yaml +26 -0
  25. package/configs/cursor-mcp.json +15 -0
  26. package/configs/gemini-cli-mcp.json +11 -0
  27. package/configs/jetbrains-mcp.json +11 -0
  28. package/configs/opencode-mcp.json +12 -0
  29. package/configs/perplexity-mcp.json +9 -0
  30. package/configs/vscode-copilot-mcp.json +12 -0
  31. package/configs/windsurf-mcp.json +16 -0
  32. package/configs/zed-mcp.json +12 -0
  33. package/docs/ARCHITECTURE.md +877 -0
  34. package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
  35. package/docs/COMPETITIVE-ANALYSIS.md +210 -0
  36. package/docs/COMPRESSION-README.md +390 -0
  37. package/docs/GRAPH-ENGINE.md +503 -0
  38. package/docs/MCP-MANUAL-SETUP.md +720 -0
  39. package/docs/MCP-TROUBLESHOOTING.md +787 -0
  40. package/docs/PATTERN-LEARNING.md +363 -0
  41. package/docs/PROFILES-GUIDE.md +453 -0
  42. package/docs/RESET-GUIDE.md +353 -0
  43. package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
  44. package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
  45. package/docs/UI-SERVER.md +254 -0
  46. package/docs/UNIVERSAL-INTEGRATION.md +432 -0
  47. package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
  48. package/docs/WINDOWS-INSTALL-README.txt +34 -0
  49. package/docs/WINDOWS-POST-INSTALL.txt +45 -0
  50. package/docs/example_graph_usage.py +148 -0
  51. package/hooks/memory-list-skill.js +130 -0
  52. package/hooks/memory-profile-skill.js +284 -0
  53. package/hooks/memory-recall-skill.js +109 -0
  54. package/hooks/memory-remember-skill.js +127 -0
  55. package/hooks/memory-reset-skill.js +274 -0
  56. package/install-skills.sh +436 -0
  57. package/install.ps1 +417 -0
  58. package/install.sh +755 -0
  59. package/mcp_server.py +585 -0
  60. package/package.json +94 -0
  61. package/requirements-core.txt +24 -0
  62. package/requirements.txt +10 -0
  63. package/scripts/postinstall.js +126 -0
  64. package/scripts/preuninstall.js +57 -0
  65. package/skills/slm-build-graph/SKILL.md +423 -0
  66. package/skills/slm-list-recent/SKILL.md +348 -0
  67. package/skills/slm-recall/SKILL.md +325 -0
  68. package/skills/slm-remember/SKILL.md +194 -0
  69. package/skills/slm-status/SKILL.md +363 -0
  70. package/skills/slm-switch-profile/SKILL.md +442 -0
  71. package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
  72. package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
  73. package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
  74. package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
  75. package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
  76. package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
  77. package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
  78. package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
  79. package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
  80. package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
  81. package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
  82. package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
  83. package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
  84. package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
  85. package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
  86. package/src/cache_manager.py +520 -0
  87. package/src/embedding_engine.py +671 -0
  88. package/src/graph_engine.py +970 -0
  89. package/src/hnsw_index.py +626 -0
  90. package/src/hybrid_search.py +693 -0
  91. package/src/memory-profiles.py +518 -0
  92. package/src/memory-reset.py +485 -0
  93. package/src/memory_compression.py +999 -0
  94. package/src/memory_store_v2.py +1088 -0
  95. package/src/migrate_v1_to_v2.py +638 -0
  96. package/src/pattern_learner.py +898 -0
  97. package/src/query_optimizer.py +513 -0
  98. package/src/search_engine_v2.py +403 -0
  99. package/src/setup_validator.py +479 -0
  100. package/src/tree_manager.py +720 -0
@@ -0,0 +1,999 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Intelligent Local Memory System
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ Author: Varun Pratap Bhardwaj (Solution Architect)
9
+
10
+ NOTICE: This software is protected by MIT License.
11
+ Attribution must be preserved in all copies or derivatives.
12
+ """
13
+
14
+ """
15
+ Progressive Summarization Compression for SuperLocalMemory
16
+ Tier-based compression system to maintain 100+ memories efficiently.
17
+
18
+ Tier Strategy:
19
+ - Tier 1 (0-30 days): Full content (no compression)
20
+ - Tier 2 (30-90 days): Summary + key excerpts (~80% reduction)
21
+ - Tier 3 (90+ days): Bullet points only (~96% reduction)
22
+ - Cold Storage (1+ year): Gzipped JSON archives (~98% reduction)
23
+
24
+ No external LLM calls - all compression is extractive using local algorithms.
25
+ """
26
+
27
+ import sqlite3
28
+ import json
29
+ import gzip
30
+ import re
31
+ from datetime import datetime, timedelta
32
+ from pathlib import Path
33
+ from typing import List, Dict, Optional, Tuple, Any
34
+ import hashlib
35
+
36
+
37
+ MEMORY_DIR = Path.home() / ".claude-memory"
38
+ DB_PATH = MEMORY_DIR / "memory.db"
39
+ CONFIG_PATH = MEMORY_DIR / "config.json"
40
+ COLD_STORAGE_PATH = MEMORY_DIR / "cold-storage"
41
+ LOGS_PATH = MEMORY_DIR / "logs"
42
+
43
+
44
+ class CompressionConfig:
45
+ """Configuration for compression behavior."""
46
+
47
+ def __init__(self):
48
+ self.config = self._load_config()
49
+ self.compression_settings = self.config.get('compression', {})
50
+
51
+ def _load_config(self) -> Dict[str, Any]:
52
+ """Load configuration from config.json."""
53
+ if CONFIG_PATH.exists():
54
+ with open(CONFIG_PATH, 'r') as f:
55
+ return json.load(f)
56
+ return {}
57
+
58
+ def save(self):
59
+ """Save configuration back to config.json."""
60
+ with open(CONFIG_PATH, 'w') as f:
61
+ json.dump(self.config, f, indent=2)
62
+
63
+ @property
64
+ def enabled(self) -> bool:
65
+ return self.compression_settings.get('enabled', True)
66
+
67
+ @property
68
+ def tier2_threshold_days(self) -> int:
69
+ return self.compression_settings.get('tier2_threshold_days', 30)
70
+
71
+ @property
72
+ def tier3_threshold_days(self) -> int:
73
+ return self.compression_settings.get('tier3_threshold_days', 90)
74
+
75
+ @property
76
+ def cold_storage_threshold_days(self) -> int:
77
+ return self.compression_settings.get('cold_storage_threshold_days', 365)
78
+
79
+ @property
80
+ def preserve_high_importance(self) -> bool:
81
+ return self.compression_settings.get('preserve_high_importance', True)
82
+
83
+ @property
84
+ def preserve_recently_accessed(self) -> bool:
85
+ return self.compression_settings.get('preserve_recently_accessed', True)
86
+
87
+ def initialize_defaults(self):
88
+ """Initialize compression settings in config if not present."""
89
+ if 'compression' not in self.config:
90
+ self.config['compression'] = {
91
+ 'enabled': True,
92
+ 'tier2_threshold_days': 30,
93
+ 'tier3_threshold_days': 90,
94
+ 'cold_storage_threshold_days': 365,
95
+ 'preserve_high_importance': True,
96
+ 'preserve_recently_accessed': True
97
+ }
98
+ self.save()
99
+
100
+
101
+ class TierClassifier:
102
+ """Classify memories into compression tiers based on age and access patterns."""
103
+
104
+ def __init__(self, db_path: Path = DB_PATH):
105
+ self.db_path = db_path
106
+ self.config = CompressionConfig()
107
+ self._ensure_schema()
108
+
109
+ def _ensure_schema(self):
110
+ """Add tier and access tracking columns if not present."""
111
+ conn = sqlite3.connect(self.db_path)
112
+ cursor = conn.cursor()
113
+
114
+ # Check if tier column exists
115
+ cursor.execute("PRAGMA table_info(memories)")
116
+ columns = [row[1] for row in cursor.fetchall()]
117
+
118
+ if 'tier' not in columns:
119
+ cursor.execute('ALTER TABLE memories ADD COLUMN tier INTEGER DEFAULT 1')
120
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_tier ON memories(tier)')
121
+
122
+ if 'last_accessed' not in columns:
123
+ cursor.execute('ALTER TABLE memories ADD COLUMN last_accessed TIMESTAMP')
124
+
125
+ if 'access_count' not in columns:
126
+ cursor.execute('ALTER TABLE memories ADD COLUMN access_count INTEGER DEFAULT 0')
127
+
128
+ # Create memory_archive table if not exists
129
+ cursor.execute('''
130
+ CREATE TABLE IF NOT EXISTS memory_archive (
131
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
132
+ memory_id INTEGER UNIQUE NOT NULL,
133
+ full_content TEXT NOT NULL,
134
+ archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
135
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
136
+ )
137
+ ''')
138
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_archive_memory ON memory_archive(memory_id)')
139
+
140
+ conn.commit()
141
+ conn.close()
142
+
143
+ def classify_memories(self) -> List[Tuple[int, int]]:
144
+ """
145
+ Classify all memories into tiers based on age and access.
146
+
147
+ Returns:
148
+ List of (tier, memory_id) tuples
149
+ """
150
+ if not self.config.enabled:
151
+ return []
152
+
153
+ now = datetime.now()
154
+ conn = sqlite3.connect(self.db_path)
155
+ cursor = conn.cursor()
156
+
157
+ # Get all memories with access tracking
158
+ cursor.execute('''
159
+ SELECT id, created_at, last_accessed, access_count, importance, tier
160
+ FROM memories
161
+ ''')
162
+ memories = cursor.fetchall()
163
+
164
+ tier_updates = []
165
+
166
+ for memory_id, created_at, last_accessed, access_count, importance, current_tier in memories:
167
+ created = datetime.fromisoformat(created_at)
168
+ age_days = (now - created).days
169
+
170
+ # Override: High-importance memories stay in Tier 1
171
+ if self.config.preserve_high_importance and importance and importance >= 8:
172
+ tier = 1
173
+ # Recently accessed stays in Tier 1
174
+ elif self.config.preserve_recently_accessed and last_accessed:
175
+ last_access = datetime.fromisoformat(last_accessed)
176
+ if (now - last_access).days < 7:
177
+ tier = 1
178
+ else:
179
+ tier = self._classify_by_age(age_days)
180
+ # Age-based classification
181
+ else:
182
+ tier = self._classify_by_age(age_days)
183
+
184
+ # Only update if tier changed
185
+ if tier != current_tier:
186
+ tier_updates.append((tier, memory_id))
187
+
188
+ # Update tier field
189
+ if tier_updates:
190
+ cursor.executemany('''
191
+ UPDATE memories SET tier = ? WHERE id = ?
192
+ ''', tier_updates)
193
+ conn.commit()
194
+
195
+ conn.close()
196
+ return tier_updates
197
+
198
+ def _classify_by_age(self, age_days: int) -> int:
199
+ """Classify memory tier based on age."""
200
+ if age_days < self.config.tier2_threshold_days:
201
+ return 1 # Recent
202
+ elif age_days < self.config.tier3_threshold_days:
203
+ return 2 # Active
204
+ else:
205
+ return 3 # Archived
206
+
207
+ def get_tier_stats(self) -> Dict[str, int]:
208
+ """Get count of memories in each tier."""
209
+ conn = sqlite3.connect(self.db_path)
210
+ cursor = conn.cursor()
211
+
212
+ cursor.execute('''
213
+ SELECT tier, COUNT(*) FROM memories GROUP BY tier
214
+ ''')
215
+ stats = dict(cursor.fetchall())
216
+ conn.close()
217
+
218
+ return {
219
+ 'tier1': stats.get(1, 0),
220
+ 'tier2': stats.get(2, 0),
221
+ 'tier3': stats.get(3, 0)
222
+ }
223
+
224
+
225
+ class Tier2Compressor:
226
+ """Compress memories to summary + key excerpts (Tier 2)."""
227
+
228
+ def __init__(self, db_path: Path = DB_PATH):
229
+ self.db_path = db_path
230
+
231
+ def compress_to_tier2(self, memory_id: int) -> bool:
232
+ """
233
+ Compress memory to summary + excerpts.
234
+
235
+ Args:
236
+ memory_id: ID of memory to compress
237
+
238
+ Returns:
239
+ True if compression succeeded, False otherwise
240
+ """
241
+ conn = sqlite3.connect(self.db_path)
242
+ cursor = conn.cursor()
243
+
244
+ # Get full content
245
+ cursor.execute('''
246
+ SELECT content, summary, tier FROM memories WHERE id = ?
247
+ ''', (memory_id,))
248
+ result = cursor.fetchone()
249
+
250
+ if not result:
251
+ conn.close()
252
+ return False
253
+
254
+ content, existing_summary, current_tier = result
255
+
256
+ # Skip if already compressed or in wrong tier
257
+ if current_tier != 2:
258
+ conn.close()
259
+ return False
260
+
261
+ # Check if already archived (don't re-compress)
262
+ cursor.execute('''
263
+ SELECT full_content FROM memory_archive WHERE memory_id = ?
264
+ ''', (memory_id,))
265
+ if cursor.fetchone():
266
+ conn.close()
267
+ return True # Already compressed
268
+
269
+ # Try to parse as JSON (might already be compressed)
270
+ try:
271
+ parsed = json.loads(content)
272
+ if isinstance(parsed, dict) and 'summary' in parsed:
273
+ conn.close()
274
+ return True # Already compressed
275
+ except (json.JSONDecodeError, TypeError):
276
+ pass # Not compressed yet
277
+
278
+ # Generate/enhance summary if needed
279
+ if not existing_summary or len(existing_summary) < 100:
280
+ summary = self._generate_summary(content)
281
+ else:
282
+ summary = existing_summary
283
+
284
+ # Extract key excerpts (important sentences, code blocks, lists)
285
+ excerpts = self._extract_key_excerpts(content)
286
+
287
+ # Store compressed version
288
+ compressed_content = {
289
+ 'summary': summary,
290
+ 'excerpts': excerpts,
291
+ 'original_length': len(content),
292
+ 'compressed_at': datetime.now().isoformat()
293
+ }
294
+
295
+ # Move full content to archive table
296
+ cursor.execute('''
297
+ INSERT INTO memory_archive (memory_id, full_content, archived_at)
298
+ VALUES (?, ?, CURRENT_TIMESTAMP)
299
+ ''', (memory_id, content))
300
+
301
+ # Update memory with compressed version
302
+ cursor.execute('''
303
+ UPDATE memories
304
+ SET content = ?, tier = 2, updated_at = CURRENT_TIMESTAMP
305
+ WHERE id = ?
306
+ ''', (json.dumps(compressed_content), memory_id))
307
+
308
+ conn.commit()
309
+ conn.close()
310
+ return True
311
+
312
+ def _generate_summary(self, content: str, max_length: int = 300) -> str:
313
+ """
314
+ Generate extractive summary from content.
315
+ Uses sentence scoring based on heuristics (no external LLM).
316
+
317
+ Args:
318
+ content: Full content text
319
+ max_length: Maximum summary length in characters
320
+
321
+ Returns:
322
+ Extracted summary
323
+ """
324
+ # Split into sentences
325
+ sentences = re.split(r'[.!?]+', content)
326
+
327
+ # Score sentences by importance (simple heuristic)
328
+ scored_sentences = []
329
+
330
+ for i, sent in enumerate(sentences):
331
+ sent = sent.strip()
332
+ if len(sent) < 10:
333
+ continue
334
+
335
+ score = 0
336
+
337
+ # Boost if contains tech terms
338
+ tech_terms = ['api', 'database', 'auth', 'component', 'function',
339
+ 'class', 'method', 'variable', 'error', 'bug', 'fix',
340
+ 'implement', 'refactor', 'test', 'deploy']
341
+ score += sum(1 for term in tech_terms if term in sent.lower())
342
+
343
+ # Boost if at start or end (thesis/conclusion)
344
+ if i == 0 or i == len(sentences) - 1:
345
+ score += 2
346
+
347
+ # Boost if contains numbers/specifics
348
+ if re.search(r'\d+', sent):
349
+ score += 1
350
+
351
+ # Boost if contains important keywords
352
+ important_keywords = ['important', 'critical', 'note', 'remember',
353
+ 'key', 'main', 'primary', 'must', 'should']
354
+ score += sum(2 for kw in important_keywords if kw in sent.lower())
355
+
356
+ scored_sentences.append((score, sent))
357
+
358
+ # Take top sentences up to max_length
359
+ scored_sentences.sort(reverse=True, key=lambda x: x[0])
360
+
361
+ summary_parts = []
362
+ current_length = 0
363
+
364
+ for score, sent in scored_sentences:
365
+ if current_length + len(sent) > max_length:
366
+ break
367
+
368
+ summary_parts.append(sent)
369
+ current_length += len(sent)
370
+
371
+ if not summary_parts:
372
+ # Fallback: take first sentence
373
+ return sentences[0][:max_length] if sentences else content[:max_length]
374
+
375
+ return '. '.join(summary_parts) + '.'
376
+
377
+ def _extract_key_excerpts(self, content: str, max_excerpts: int = 3) -> List[str]:
378
+ """
379
+ Extract key excerpts (code blocks, lists, important paragraphs).
380
+
381
+ Args:
382
+ content: Full content text
383
+ max_excerpts: Maximum number of excerpts to extract
384
+
385
+ Returns:
386
+ List of excerpt strings
387
+ """
388
+ excerpts = []
389
+
390
+ # Extract code blocks (markdown or indented)
391
+ code_blocks = re.findall(r'```[\s\S]*?```', content)
392
+ excerpts.extend(code_blocks[:2]) # Max 2 code blocks
393
+
394
+ # Extract bullet lists
395
+ list_pattern = r'(?:^|\n)(?:[-*•]|\d+\.)\s+.+(?:\n(?:[-*•]|\d+\.)\s+.+)*'
396
+ lists = re.findall(list_pattern, content, re.MULTILINE)
397
+ if lists and len(excerpts) < max_excerpts:
398
+ excerpts.extend(lists[:1]) # Max 1 list
399
+
400
+ # Extract paragraphs with important keywords if we need more
401
+ if len(excerpts) < max_excerpts:
402
+ paragraphs = content.split('\n\n')
403
+ important_keywords = ['important', 'critical', 'note', 'remember', 'key']
404
+
405
+ for para in paragraphs:
406
+ if len(excerpts) >= max_excerpts:
407
+ break
408
+
409
+ if any(kw in para.lower() for kw in important_keywords):
410
+ # Truncate long paragraphs
411
+ if len(para) > 200:
412
+ para = para[:197] + '...'
413
+ excerpts.append(para)
414
+
415
+ # Truncate if too many
416
+ return excerpts[:max_excerpts]
417
+
418
+ def compress_all_tier2(self) -> int:
419
+ """Compress all memories that are in Tier 2."""
420
+ conn = sqlite3.connect(self.db_path)
421
+ cursor = conn.cursor()
422
+
423
+ cursor.execute('SELECT id FROM memories WHERE tier = 2')
424
+ memory_ids = [row[0] for row in cursor.fetchall()]
425
+ conn.close()
426
+
427
+ compressed_count = 0
428
+ for memory_id in memory_ids:
429
+ if self.compress_to_tier2(memory_id):
430
+ compressed_count += 1
431
+
432
+ return compressed_count
433
+
434
+
435
+ class Tier3Compressor:
436
+ """Compress memories to bullet points only (Tier 3)."""
437
+
438
+ def __init__(self, db_path: Path = DB_PATH):
439
+ self.db_path = db_path
440
+
441
+ def compress_to_tier3(self, memory_id: int) -> bool:
442
+ """
443
+ Compress memory to bullet points only.
444
+
445
+ Args:
446
+ memory_id: ID of memory to compress
447
+
448
+ Returns:
449
+ True if compression succeeded, False otherwise
450
+ """
451
+ conn = sqlite3.connect(self.db_path)
452
+ cursor = conn.cursor()
453
+
454
+ # Get Tier 2 compressed content
455
+ cursor.execute('''
456
+ SELECT content, tier FROM memories WHERE id = ?
457
+ ''', (memory_id,))
458
+ result = cursor.fetchone()
459
+
460
+ if not result:
461
+ conn.close()
462
+ return False
463
+
464
+ content, current_tier = result
465
+
466
+ # Skip if in wrong tier
467
+ if current_tier != 3:
468
+ conn.close()
469
+ return False
470
+
471
+ # Try to parse as Tier 2 compressed content
472
+ try:
473
+ compressed_content = json.loads(content)
474
+
475
+ # Check if already Tier 3
476
+ if isinstance(compressed_content, dict) and 'bullets' in compressed_content:
477
+ conn.close()
478
+ return True # Already Tier 3
479
+
480
+ # Get summary from Tier 2
481
+ if isinstance(compressed_content, dict) and 'summary' in compressed_content:
482
+ summary = compressed_content.get('summary', '')
483
+ tier2_archived_at = compressed_content.get('compressed_at')
484
+ original_length = compressed_content.get('original_length', 0)
485
+ else:
486
+ # Not Tier 2 format, treat as plain text
487
+ summary = content
488
+ tier2_archived_at = None
489
+ original_length = len(content)
490
+
491
+ except (json.JSONDecodeError, TypeError):
492
+ # Not JSON, treat as plain text
493
+ summary = content
494
+ tier2_archived_at = None
495
+ original_length = len(content)
496
+
497
+ # Convert summary to bullet points (max 5)
498
+ bullet_points = self._summarize_to_bullets(summary)
499
+
500
+ # Ultra-compressed version
501
+ ultra_compressed = {
502
+ 'bullets': bullet_points,
503
+ 'tier2_archived_at': tier2_archived_at,
504
+ 'original_length': original_length,
505
+ 'compressed_to_tier3_at': datetime.now().isoformat()
506
+ }
507
+
508
+ # Update memory
509
+ cursor.execute('''
510
+ UPDATE memories
511
+ SET content = ?, tier = 3, updated_at = CURRENT_TIMESTAMP
512
+ WHERE id = ?
513
+ ''', (json.dumps(ultra_compressed), memory_id))
514
+
515
+ conn.commit()
516
+ conn.close()
517
+ return True
518
+
519
+ def _summarize_to_bullets(self, summary: str, max_bullets: int = 5) -> List[str]:
520
+ """
521
+ Convert summary to bullet points.
522
+
523
+ Args:
524
+ summary: Summary text
525
+ max_bullets: Maximum number of bullets
526
+
527
+ Returns:
528
+ List of bullet point strings
529
+ """
530
+ # Split into sentences
531
+ sentences = re.split(r'[.!?]+', summary)
532
+
533
+ bullets = []
534
+
535
+ for sent in sentences:
536
+ sent = sent.strip()
537
+
538
+ if len(sent) < 10:
539
+ continue
540
+
541
+ # Truncate long sentences
542
+ if len(sent) > 80:
543
+ sent = sent[:77] + '...'
544
+
545
+ bullets.append(sent)
546
+
547
+ if len(bullets) >= max_bullets:
548
+ break
549
+
550
+ return bullets if bullets else ['[No summary available]']
551
+
552
+ def compress_all_tier3(self) -> int:
553
+ """Compress all memories that are in Tier 3."""
554
+ conn = sqlite3.connect(self.db_path)
555
+ cursor = conn.cursor()
556
+
557
+ cursor.execute('SELECT id FROM memories WHERE tier = 3')
558
+ memory_ids = [row[0] for row in cursor.fetchall()]
559
+ conn.close()
560
+
561
+ compressed_count = 0
562
+ for memory_id in memory_ids:
563
+ if self.compress_to_tier3(memory_id):
564
+ compressed_count += 1
565
+
566
+ return compressed_count
567
+
568
+
569
+ class ColdStorageManager:
570
+ """Manage cold storage archives for very old memories."""
571
+
572
+ def __init__(self, db_path: Path = DB_PATH, storage_path: Path = COLD_STORAGE_PATH):
573
+ self.db_path = db_path
574
+ self.storage_path = storage_path
575
+ self.storage_path.mkdir(exist_ok=True)
576
+ self.config = CompressionConfig()
577
+
578
+ def move_to_cold_storage(self, memory_ids: List[int]) -> int:
579
+ """
580
+ Move archived memories to gzipped JSON file.
581
+
582
+ Args:
583
+ memory_ids: List of memory IDs to archive
584
+
585
+ Returns:
586
+ Number of memories archived
587
+ """
588
+ if not memory_ids:
589
+ return 0
590
+
591
+ conn = sqlite3.connect(self.db_path)
592
+ cursor = conn.cursor()
593
+
594
+ # Build placeholders for SQL query
595
+ placeholders = ','.join('?' * len(memory_ids))
596
+
597
+ # Get memories from archive table
598
+ cursor.execute(f'''
599
+ SELECT m.id, m.content, m.summary, m.tags, m.project_name,
600
+ m.created_at, a.full_content
601
+ FROM memories m
602
+ LEFT JOIN memory_archive a ON m.id = a.memory_id
603
+ WHERE m.id IN ({placeholders})
604
+ ''', memory_ids)
605
+
606
+ memories = cursor.fetchall()
607
+
608
+ if not memories:
609
+ conn.close()
610
+ return 0
611
+
612
+ # Build JSON export
613
+ export_data = []
614
+
615
+ for memory in memories:
616
+ mem_id, content, summary, tags, project_name, created_at, full_content = memory
617
+
618
+ export_data.append({
619
+ 'id': mem_id,
620
+ 'tier3_content': self._safe_json_load(content),
621
+ 'summary': summary,
622
+ 'tags': self._safe_json_load(tags) if tags else [],
623
+ 'project': project_name,
624
+ 'created_at': created_at,
625
+ 'full_content': full_content # May be None if not archived
626
+ })
627
+
628
+ # Write to gzipped file
629
+ filename = f"archive-{datetime.now().strftime('%Y-%m')}.json.gz"
630
+ filepath = self.storage_path / filename
631
+
632
+ # If file exists, append to it
633
+ existing_data = []
634
+ if filepath.exists():
635
+ try:
636
+ with gzip.open(filepath, 'rt', encoding='utf-8') as f:
637
+ existing_data = json.load(f)
638
+ except Exception:
639
+ pass # File might be corrupted, start fresh
640
+
641
+ # Merge with existing data (avoid duplicates)
642
+ existing_ids = {item['id'] for item in existing_data}
643
+ for item in export_data:
644
+ if item['id'] not in existing_ids:
645
+ existing_data.append(item)
646
+
647
+ # Write combined data
648
+ with gzip.open(filepath, 'wt', encoding='utf-8') as f:
649
+ json.dump(existing_data, f, indent=2)
650
+
651
+ # Delete from archive table (keep Tier 3 version in main table)
652
+ cursor.executemany('DELETE FROM memory_archive WHERE memory_id = ?',
653
+ [(mid,) for mid in memory_ids])
654
+
655
+ conn.commit()
656
+ conn.close()
657
+
658
+ return len(export_data)
659
+
660
+ def _safe_json_load(self, data: str) -> Any:
661
+ """Safely load JSON data."""
662
+ try:
663
+ return json.loads(data)
664
+ except (json.JSONDecodeError, TypeError):
665
+ return data
666
+
667
+ def restore_from_cold_storage(self, memory_id: int) -> Optional[str]:
668
+ """
669
+ Restore full content from cold storage archive.
670
+
671
+ Args:
672
+ memory_id: ID of memory to restore
673
+
674
+ Returns:
675
+ Full content if found, None otherwise
676
+ """
677
+ # Search all archive files
678
+ for archive_file in self.storage_path.glob('archive-*.json.gz'):
679
+ try:
680
+ with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
681
+ data = json.load(f)
682
+
683
+ for memory in data:
684
+ if memory['id'] == memory_id:
685
+ full_content = memory.get('full_content')
686
+
687
+ if full_content:
688
+ # Restore to archive table
689
+ conn = sqlite3.connect(self.db_path)
690
+ cursor = conn.cursor()
691
+
692
+ cursor.execute('''
693
+ INSERT OR REPLACE INTO memory_archive
694
+ (memory_id, full_content, archived_at)
695
+ VALUES (?, ?, CURRENT_TIMESTAMP)
696
+ ''', (memory_id, full_content))
697
+
698
+ conn.commit()
699
+ conn.close()
700
+
701
+ return full_content
702
+ except Exception as e:
703
+ print(f"Error reading archive {archive_file}: {e}")
704
+ continue
705
+
706
+ return None
707
+
708
+ def get_cold_storage_candidates(self) -> List[int]:
709
+ """Get memory IDs that are candidates for cold storage."""
710
+ threshold_date = datetime.now() - timedelta(days=self.config.cold_storage_threshold_days)
711
+
712
+ conn = sqlite3.connect(self.db_path)
713
+ cursor = conn.cursor()
714
+
715
+ cursor.execute('''
716
+ SELECT id FROM memories
717
+ WHERE tier = 3
718
+ AND created_at < ?
719
+ AND importance < 8
720
+ ''', (threshold_date.isoformat(),))
721
+
722
+ memory_ids = [row[0] for row in cursor.fetchall()]
723
+ conn.close()
724
+
725
+ return memory_ids
726
+
727
+ def get_cold_storage_stats(self) -> Dict[str, Any]:
728
+ """Get statistics about cold storage."""
729
+ stats = {
730
+ 'archive_count': 0,
731
+ 'total_memories': 0,
732
+ 'total_size_bytes': 0,
733
+ 'archives': []
734
+ }
735
+
736
+ for archive_file in self.storage_path.glob('archive-*.json.gz'):
737
+ try:
738
+ size = archive_file.stat().st_size
739
+
740
+ with gzip.open(archive_file, 'rt', encoding='utf-8') as f:
741
+ data = json.load(f)
742
+ memory_count = len(data)
743
+
744
+ stats['archive_count'] += 1
745
+ stats['total_memories'] += memory_count
746
+ stats['total_size_bytes'] += size
747
+
748
+ stats['archives'].append({
749
+ 'filename': archive_file.name,
750
+ 'memory_count': memory_count,
751
+ 'size_bytes': size,
752
+ 'size_mb': round(size / 1024 / 1024, 2)
753
+ })
754
+ except Exception:
755
+ continue
756
+
757
+ return stats
758
+
759
+
760
+ class CompressionOrchestrator:
761
+ """Main orchestrator for compression operations."""
762
+
763
+ def __init__(self, db_path: Path = DB_PATH):
764
+ self.db_path = db_path
765
+ self.config = CompressionConfig()
766
+ self.classifier = TierClassifier(db_path)
767
+ self.tier2_compressor = Tier2Compressor(db_path)
768
+ self.tier3_compressor = Tier3Compressor(db_path)
769
+ self.cold_storage = ColdStorageManager(db_path)
770
+
771
+ def run_full_compression(self) -> Dict[str, Any]:
772
+ """
773
+ Run full compression cycle: classify, compress, and archive.
774
+
775
+ Returns:
776
+ Statistics about compression operation
777
+ """
778
+ if not self.config.enabled:
779
+ return {'status': 'disabled', 'message': 'Compression is disabled in config'}
780
+
781
+ stats = {
782
+ 'started_at': datetime.now().isoformat(),
783
+ 'tier_updates': 0,
784
+ 'tier2_compressed': 0,
785
+ 'tier3_compressed': 0,
786
+ 'cold_stored': 0,
787
+ 'errors': []
788
+ }
789
+
790
+ try:
791
+ # Step 1: Classify memories into tiers
792
+ tier_updates = self.classifier.classify_memories()
793
+ stats['tier_updates'] = len(tier_updates)
794
+
795
+ # Step 2: Compress Tier 2 memories
796
+ stats['tier2_compressed'] = self.tier2_compressor.compress_all_tier2()
797
+
798
+ # Step 3: Compress Tier 3 memories
799
+ stats['tier3_compressed'] = self.tier3_compressor.compress_all_tier3()
800
+
801
+ # Step 4: Move old memories to cold storage
802
+ candidates = self.cold_storage.get_cold_storage_candidates()
803
+ if candidates:
804
+ stats['cold_stored'] = self.cold_storage.move_to_cold_storage(candidates)
805
+
806
+ # Get final tier stats
807
+ stats['tier_stats'] = self.classifier.get_tier_stats()
808
+
809
+ # Calculate space savings
810
+ stats['space_savings'] = self._calculate_space_savings()
811
+
812
+ except Exception as e:
813
+ stats['errors'].append(str(e))
814
+
815
+ stats['completed_at'] = datetime.now().isoformat()
816
+ return stats
817
+
818
+ def _calculate_space_savings(self) -> Dict[str, Any]:
819
+ """Calculate estimated space savings from compression."""
820
+ conn = sqlite3.connect(self.db_path)
821
+ cursor = conn.cursor()
822
+
823
+ # Get size of compressed content
824
+ cursor.execute('''
825
+ SELECT
826
+ tier,
827
+ COUNT(*) as count,
828
+ SUM(LENGTH(content)) as total_size
829
+ FROM memories
830
+ GROUP BY tier
831
+ ''')
832
+
833
+ tier_sizes = {}
834
+ for tier, count, total_size in cursor.fetchall():
835
+ tier_sizes[tier] = {
836
+ 'count': count,
837
+ 'size_bytes': total_size or 0
838
+ }
839
+
840
+ # Get size of archived content
841
+ cursor.execute('''
842
+ SELECT
843
+ COUNT(*) as count,
844
+ SUM(LENGTH(full_content)) as total_size
845
+ FROM memory_archive
846
+ ''')
847
+ archive_count, archive_size = cursor.fetchone()
848
+
849
+ conn.close()
850
+
851
+ # Estimate original size if all were Tier 1
852
+ tier1_avg = tier_sizes.get(1, {}).get('size_bytes', 50000) / max(tier_sizes.get(1, {}).get('count', 1), 1)
853
+ total_memories = sum(t.get('count', 0) for t in tier_sizes.values())
854
+ estimated_original = int(tier1_avg * total_memories)
855
+
856
+ current_size = sum(t.get('size_bytes', 0) for t in tier_sizes.values())
857
+
858
+ return {
859
+ 'estimated_original_bytes': estimated_original,
860
+ 'current_size_bytes': current_size,
861
+ 'savings_bytes': estimated_original - current_size,
862
+ 'savings_percent': round((1 - current_size / max(estimated_original, 1)) * 100, 1),
863
+ 'tier_breakdown': tier_sizes,
864
+ 'archive_count': archive_count or 0,
865
+ 'archive_size_bytes': archive_size or 0
866
+ }
867
+
868
+
869
+ # CLI Interface
870
+ if __name__ == "__main__":
871
+ import sys
872
+
873
+ if len(sys.argv) < 2:
874
+ print("Progressive Summarization Compression for SuperLocalMemory\n")
875
+ print("Usage:")
876
+ print(" python compression.py classify # Classify memories into tiers")
877
+ print(" python compression.py compress # Run full compression cycle")
878
+ print(" python compression.py stats # Show compression statistics")
879
+ print(" python compression.py tier2 <id> # Compress specific memory to Tier 2")
880
+ print(" python compression.py tier3 <id> # Compress specific memory to Tier 3")
881
+ print(" python compression.py cold-storage # Move old memories to cold storage")
882
+ print(" python compression.py restore <id> # Restore memory from cold storage")
883
+ print(" python compression.py init-config # Initialize compression config")
884
+ sys.exit(0)
885
+
886
+ command = sys.argv[1]
887
+ orchestrator = CompressionOrchestrator()
888
+
889
+ if command == "classify":
890
+ classifier = TierClassifier()
891
+ updates = classifier.classify_memories()
892
+ print(f"Classified {len(updates)} memories")
893
+
894
+ stats = classifier.get_tier_stats()
895
+ print(f"\nTier breakdown:")
896
+ print(f" Tier 1 (Full content): {stats['tier1']} memories")
897
+ print(f" Tier 2 (Summary+excerpts): {stats['tier2']} memories")
898
+ print(f" Tier 3 (Bullets only): {stats['tier3']} memories")
899
+
900
+ elif command == "compress":
901
+ print("Running full compression cycle...")
902
+ stats = orchestrator.run_full_compression()
903
+
904
+ print(f"\nCompression Results:")
905
+ print(f" Tier updates: {stats['tier_updates']}")
906
+ print(f" Tier 2 compressed: {stats['tier2_compressed']}")
907
+ print(f" Tier 3 compressed: {stats['tier3_compressed']}")
908
+ print(f" Moved to cold storage: {stats['cold_stored']}")
909
+
910
+ if 'space_savings' in stats:
911
+ savings = stats['space_savings']
912
+ print(f"\nSpace Savings:")
913
+ print(f" Original size: {savings['estimated_original_bytes']:,} bytes")
914
+ print(f" Current size: {savings['current_size_bytes']:,} bytes")
915
+ print(f" Savings: {savings['savings_bytes']:,} bytes ({savings['savings_percent']}%)")
916
+
917
+ if stats.get('errors'):
918
+ print(f"\nErrors: {stats['errors']}")
919
+
920
+ elif command == "stats":
921
+ classifier = TierClassifier()
922
+ tier_stats = classifier.get_tier_stats()
923
+
924
+ cold_storage = ColdStorageManager()
925
+ cold_stats = cold_storage.get_cold_storage_stats()
926
+
927
+ savings = orchestrator._calculate_space_savings()
928
+
929
+ print("Compression Statistics\n")
930
+ print("Tier Breakdown:")
931
+ print(f" Tier 1 (Full content): {tier_stats['tier1']} memories")
932
+ print(f" Tier 2 (Summary+excerpts): {tier_stats['tier2']} memories")
933
+ print(f" Tier 3 (Bullets only): {tier_stats['tier3']} memories")
934
+
935
+ print(f"\nCold Storage:")
936
+ print(f" Archive files: {cold_stats['archive_count']}")
937
+ print(f" Total memories: {cold_stats['total_memories']}")
938
+ print(f" Total size: {cold_stats['total_size_bytes']:,} bytes")
939
+
940
+ print(f"\nSpace Savings:")
941
+ print(f" Estimated original: {savings['estimated_original_bytes']:,} bytes")
942
+ print(f" Current size: {savings['current_size_bytes']:,} bytes")
943
+ print(f" Savings: {savings['savings_bytes']:,} bytes ({savings['savings_percent']}%)")
944
+
945
+ elif command == "tier2" and len(sys.argv) >= 3:
946
+ try:
947
+ memory_id = int(sys.argv[2])
948
+ compressor = Tier2Compressor()
949
+ if compressor.compress_to_tier2(memory_id):
950
+ print(f"Memory #{memory_id} compressed to Tier 2")
951
+ else:
952
+ print(f"Failed to compress memory #{memory_id}")
953
+ except ValueError:
954
+ print("Error: Memory ID must be a number")
955
+
956
+ elif command == "tier3" and len(sys.argv) >= 3:
957
+ try:
958
+ memory_id = int(sys.argv[2])
959
+ compressor = Tier3Compressor()
960
+ if compressor.compress_to_tier3(memory_id):
961
+ print(f"Memory #{memory_id} compressed to Tier 3")
962
+ else:
963
+ print(f"Failed to compress memory #{memory_id}")
964
+ except ValueError:
965
+ print("Error: Memory ID must be a number")
966
+
967
+ elif command == "cold-storage":
968
+ cold_storage = ColdStorageManager()
969
+ candidates = cold_storage.get_cold_storage_candidates()
970
+
971
+ if not candidates:
972
+ print("No memories ready for cold storage")
973
+ else:
974
+ print(f"Moving {len(candidates)} memories to cold storage...")
975
+ count = cold_storage.move_to_cold_storage(candidates)
976
+ print(f"Archived {count} memories")
977
+
978
+ elif command == "restore" and len(sys.argv) >= 3:
979
+ try:
980
+ memory_id = int(sys.argv[2])
981
+ cold_storage = ColdStorageManager()
982
+ content = cold_storage.restore_from_cold_storage(memory_id)
983
+
984
+ if content:
985
+ print(f"Memory #{memory_id} restored from cold storage")
986
+ else:
987
+ print(f"Memory #{memory_id} not found in cold storage")
988
+ except ValueError:
989
+ print("Error: Memory ID must be a number")
990
+
991
+ elif command == "init-config":
992
+ config = CompressionConfig()
993
+ config.initialize_defaults()
994
+ print("Compression configuration initialized")
995
+ print(json.dumps(config.compression_settings, indent=2))
996
+
997
+ else:
998
+ print(f"Unknown command: {command}")
999
+ sys.exit(1)