shiro-memory 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ """
2
+ Shiro Memory v2.0 — Enhanced persistent memory for OpenClaw agents.
3
+
4
+ v2.0 Features:
5
+ - 10-layer search engine (replaces v1.0 BM25)
6
+ - LLM enrichment with search keywords and summaries
7
+ - Enhanced entry fields: agent_id, content_hash, enriched_summary, search_keywords
8
+ - Pipeline bridge integration for seamless enrichment workflow
9
+ - Preserved all of Shiro's original namespace/session logic
10
+
11
+ Forked from Shiro's personal memory system with selective upgrades from antaris-suite.
12
+
13
+ Usage:
14
+ from antaris_memory import MemorySystem
15
+
16
+ mem = MemorySystem("./workspace")
17
+ mem.load()
18
+ mem.ingest("Key decision made", source="meeting", category="strategic")
19
+ results = mem.search("decision")
20
+ mem.save()
21
+ """
22
+
23
+ __version__ = "2.0.0"
24
+
25
+ # Core
26
+ from antaris_memory.core_v4 import MemorySystemV4 as MemorySystem
27
+ from antaris_memory.entry import MemoryEntry
28
+ from antaris_memory.decay import DecayEngine
29
+ from antaris_memory.sentiment import SentimentTagger
30
+ from antaris_memory.temporal import TemporalEngine
31
+ from antaris_memory.confidence import ConfidenceEngine
32
+ from antaris_memory.compression import CompressionEngine
33
+ from antaris_memory.forgetting import ForgettingEngine
34
+ from antaris_memory.consolidation import ConsolidationEngine
35
+ from antaris_memory.gating import InputGate
36
+ from antaris_memory.synthesis import KnowledgeSynthesizer
37
+
38
+ # Multi-agent
39
+ from antaris_memory.shared import SharedMemoryPool, AgentPermission
40
+
41
+ # Storage
42
+ from antaris_memory.migration import MigrationManager, Migration
43
+ from antaris_memory.indexing import IndexManager, SearchIndex, TagIndex, DateIndex
44
+
45
+ # Concurrency
46
+ from antaris_memory.locking import FileLock, LockTimeout
47
+ from antaris_memory.versioning import VersionTracker, ConflictError
48
+
49
+ # Search
50
+ from antaris_memory.search import SearchEngine, SearchResult
51
+
52
+ # Context Packets (v1.1)
53
+ from antaris_memory.context_packet import ContextPacket, ContextPacketBuilder
54
+
55
+ # Recovery (v3.3.1)
56
+ from antaris_memory.recovery import RecoveryConfig, RecoveryManager
57
+
58
+ # Memory Types + Namespace Isolation (Sprint 2 + Sprint 8)
59
+ from antaris_memory.memory_types import MEMORY_TYPE_CONFIGS, get_type_config
60
+ from antaris_memory.namespace import NamespacedMemory, NamespaceManager
61
+
62
+ # Sprint 3 — Semantic utilities
63
+ from antaris_memory.utils import cosine_similarity
64
+
65
+ # v3.2 — Instrumentation
66
+ from antaris_memory.instrumentation import (
67
+ SearchContext,
68
+ extract_memory_references,
69
+ anonymize_query,
70
+ build_usage_signal,
71
+ )
72
+
73
+ # Backward compatibility - import legacy core if needed
74
+ try:
75
+ from antaris_memory.core import MemorySystem as LegacyMemorySystem
76
+ except ImportError:
77
+ LegacyMemorySystem = None
78
+
79
+ __all__ = [
80
+ "MemorySystem",
81
+ "MemoryEntry",
82
+
83
+ # Core engines
84
+ "DecayEngine",
85
+ "SentimentTagger",
86
+ "TemporalEngine",
87
+ "ConfidenceEngine",
88
+ "CompressionEngine",
89
+ "ForgettingEngine",
90
+ "ConsolidationEngine",
91
+ "InputGate",
92
+ "KnowledgeSynthesizer",
93
+
94
+ # Multi-agent (v0.3)
95
+ "SharedMemoryPool",
96
+ "AgentPermission",
97
+
98
+ # Production features (v0.4)
99
+ "MigrationManager",
100
+ "Migration",
101
+ "IndexManager",
102
+ "SearchIndex",
103
+ "TagIndex",
104
+ "DateIndex",
105
+
106
+ # Concurrency (v0.5)
107
+ "FileLock",
108
+ "LockTimeout",
109
+ "VersionTracker",
110
+ "ConflictError",
111
+
112
+ # Search (v1.0)
113
+ "SearchEngine",
114
+ "SearchResult",
115
+
116
+ # Context Packets (v1.1)
117
+ "ContextPacket",
118
+ "ContextPacketBuilder",
119
+
120
+ # Sprint 2 — Memory Types
121
+ "MEMORY_TYPE_CONFIGS",
122
+ "get_type_config",
123
+
124
+ # Sprint 8 — Namespace Isolation
125
+ "NamespacedMemory",
126
+ "NamespaceManager",
127
+
128
+ # Sprint 3 — Hybrid Semantic Search
129
+ "cosine_similarity",
130
+
131
+ # v3.2 — Instrumentation
132
+ "SearchContext",
133
+ "extract_memory_references",
134
+ "anonymize_query",
135
+ "build_usage_signal",
136
+ ]
@@ -0,0 +1,484 @@
1
+ """
2
+ Structured audit logging for antaris-memory v3.9.0.
3
+
4
+ Append-only JSON Lines (.jsonl) format audit trail for all memory operations.
5
+ Thread-safe via file locking. Auto-rotation when files exceed size limit.
6
+ PII anonymization via HMAC-SHA256 with a per-instance or persistent salt.
7
+
8
+ Example usage:
9
+ audit = AuditLogger("/path/to/workspace")
10
+
11
+ # Log memory operations
12
+ audit.log("ingest", {"id": "mem123", "source": "chat", "category": "technical"})
13
+ audit.log("search", {"query": "Python tips", "results_count": 5, "duration_ms": 23})
14
+ audit.log("recall", {"memory_id": "mem123", "context_id": "session_456"})
15
+
16
+ # Query audit trail
17
+ recent_recalls = audit.query(event_type="recall", since=time.time() - 3600)
18
+ stats = audit.stats()
19
+ """
20
+
21
+ import hashlib
22
+ import hmac
23
+ import json
24
+ import os
25
+ import re
26
+ import stat
27
+ import time
28
+ import threading
29
+ from pathlib import Path
30
+ from typing import Dict, List, Optional
31
+
32
+ try:
33
+ import fcntl
34
+ FCNTL_AVAILABLE = True
35
+ except ImportError:
36
+ FCNTL_AVAILABLE = False
37
+
38
+ try:
39
+ import msvcrt
40
+ MSVCRT_AVAILABLE = True
41
+ except ImportError:
42
+ MSVCRT_AVAILABLE = False
43
+
44
+ # Salt must be exactly 32 lowercase hex chars (128 bits from os.urandom(16).hex())
45
+ _SALT_RE = re.compile(r"^[0-9a-f]{32}$")
46
+
47
+
48
+ def _safe_chmod_600(path: Path) -> None:
49
+ """Set file permissions to 0o600 (owner read+write only).
50
+
51
+ Best-effort — silently ignores failures on Windows and restricted filesystems.
52
+ """
53
+ try:
54
+ os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
55
+ except Exception:
56
+ pass # Windows / restricted FS: best-effort only
57
+
58
+
59
+ def _read_or_create_salt(workspace: Path, initial_salt: str) -> str:
60
+ """Load a persistent salt from .audit.salt, creating it if absent or corrupt.
61
+
62
+ Validates that the stored salt is exactly 32 lowercase hex chars. If the file
63
+ is missing, unreadable, or contains invalid data, writes initial_salt as the
64
+ new salt and returns it.
65
+
66
+ Args:
67
+ workspace: Directory containing the .audit.salt file.
68
+ initial_salt: Fresh salt to use if the file is absent or corrupt.
69
+
70
+ Returns:
71
+ A validated 32-char hex salt string.
72
+ """
73
+ workspace.mkdir(parents=True, exist_ok=True)
74
+ salt_file = workspace / ".audit.salt"
75
+
76
+ if salt_file.exists():
77
+ try:
78
+ s = salt_file.read_text(encoding="utf-8").strip()
79
+ except Exception:
80
+ s = ""
81
+ if _SALT_RE.match(s):
82
+ return s
83
+ # Corrupt or invalid salt — fall through to regenerate
84
+
85
+ # Write initial_salt (either first run or regeneration after corruption)
86
+ try:
87
+ salt_file.write_text(initial_salt, encoding="utf-8")
88
+ except Exception:
89
+ # If we can't write the salt file, return the in-memory salt without persisting
90
+ return initial_salt
91
+ _safe_chmod_600(salt_file)
92
+ return initial_salt
93
+
94
+
95
+ class AuditLogger:
96
+ """Append-only structured audit log for memory operations.
97
+
98
+ Writes one JSON object per line to `<workspace>/.audit.jsonl`.
99
+ Thread-safe via file locking. Rotation when file exceeds max_size_mb.
100
+
101
+ PII anonymization uses HMAC-SHA256 with a per-instance salt (128 bits of
102
+ entropy from os.urandom(16)). In persistent mode the salt is saved to
103
+ .audit.salt (chmod 0o600). Validated on load — corrupted salt files are
104
+ regenerated automatically.
105
+
106
+ Events logged:
107
+ - ingest: memory stored (id, source, category, timestamp)
108
+ - search: query executed (query, results_count, duration_ms)
109
+ - recall: memory retrieved and used (memory_id, context_id)
110
+ - decay: memory score decayed (memory_id, old_score, new_score)
111
+ - delete: memory removed (memory_id, reason)
112
+ - reinforce: memory reinforced (memory_id, boost)
113
+ - share: memory shared to pool (memory_id, pool_id)
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ workspace: str,
119
+ max_size_mb: float = 10.0,
120
+ max_files: int = 5,
121
+ salt_mode: str = "ephemeral",
122
+ ):
123
+ """Initialize audit logger.
124
+
125
+ Args:
126
+ workspace: Directory path where audit logs will be stored
127
+ max_size_mb: Maximum size of log file before rotation (default 10MB)
128
+ max_files: Maximum number of rotated files to keep (default 5)
129
+ salt_mode: Salt persistence mode — "ephemeral" (default, new salt each run)
130
+ or "persistent" (salt stored in .audit.salt, reused across runs)
131
+ """
132
+ self.workspace = Path(workspace)
133
+ self.workspace.mkdir(parents=True, exist_ok=True)
134
+
135
+ self.audit_file = self.workspace / ".audit.jsonl"
136
+ self.max_size_bytes = int(max_size_mb * 1024 * 1024)
137
+ self.max_files = max_files
138
+ self._salt_mode = salt_mode
139
+
140
+ # Thread safety
141
+ self._lock = threading.Lock()
142
+
143
+ # A2: Per-instance salt for anonymization (prevents rainbow-table attacks).
144
+ # os.urandom(16) = 128 bits of entropy — plenty for a MAC key.
145
+ self._salt = os.urandom(16).hex()
146
+ if salt_mode == "persistent":
147
+ # A2-1 + GPT: Validate salt on load; regenerate if corrupt; chmod 0o600.
148
+ # _read_or_create_salt ensures the salt file is always valid + restricted.
149
+ self._salt = _read_or_create_salt(self.workspace, self._salt)
150
+
151
+ # PII fields that should be hashed for anonymization
152
+ self._pii_fields = {"content", "query", "text", "source"}
153
+
154
+ def rotate_salt(self) -> None:
155
+ """Generate a new salt, invalidating previous hashes.
156
+
157
+ Persists to .audit.salt if salt_mode='persistent'. The new salt is validated
158
+ via _read_or_create_salt so file write failures fall back to in-memory salt.
159
+ """
160
+ self._salt = os.urandom(16).hex()
161
+ if self._salt_mode == "persistent":
162
+ # Write + validate — re-uses the same hardened helper as __init__
163
+ self._salt = _read_or_create_salt(self.workspace, self._salt)
164
+
165
+ def log(self, event_type: str, data: dict) -> None:
166
+ """Append a single audit event. Thread-safe.
167
+
168
+ Args:
169
+ event_type: Type of event (ingest, search, recall, etc.)
170
+ data: Event-specific data dictionary
171
+ """
172
+ entry = {
173
+ "ts": time.time(),
174
+ "event": event_type,
175
+ "data": data
176
+ }
177
+
178
+ with self._lock:
179
+ # Check if rotation is needed before writing
180
+ if self.audit_file.exists() and self.audit_file.stat().st_size > self.max_size_bytes:
181
+ self._rotate_logs()
182
+
183
+ # Append the entry (atomic write)
184
+ self._write_entry(entry)
185
+
186
+ def _tail_lines(self, n: int) -> List[str]:
187
+ """Read last n lines from audit file efficiently via backward seek.
188
+
189
+ Gemini: Decodes per-line (not per-buffer) to avoid silently corrupting
190
+ JSON entries when a multibyte UTF-8 sequence is split across chunk boundaries.
191
+ errors='replace' only applies to individual lines that are genuinely corrupt,
192
+ not to the whole buffer.
193
+ """
194
+ if not self.audit_file.exists() or n <= 0:
195
+ return []
196
+
197
+ chunk_size = 8192
198
+ lines: List[bytes] = []
199
+
200
+ with open(self.audit_file, "rb") as f:
201
+ f.seek(0, 2)
202
+ remaining = f.tell()
203
+ buf = b""
204
+ while remaining > 0 and len(lines) < n + 1:
205
+ read_size = min(chunk_size, remaining)
206
+ remaining -= read_size
207
+ f.seek(remaining)
208
+ buf = f.read(read_size) + buf
209
+ lines = buf.split(b"\n")
210
+
211
+ # Decode each complete line individually — safe for any multibyte sequence
212
+ out: List[str] = []
213
+ for bline in lines:
214
+ if not bline.strip():
215
+ continue
216
+ try:
217
+ out.append(bline.decode("utf-8")) # strict first
218
+ except UnicodeDecodeError:
219
+ out.append(bline.decode("utf-8", errors="replace")) # fallback per-line
220
+ return out[-n:] if len(out) >= n else out
221
+
222
+ def query(
223
+ self,
224
+ event_type: str = None,
225
+ since: float = None,
226
+ limit: int = 100,
227
+ ) -> List[dict]:
228
+ """Query recent audit events. Reads from current file only.
229
+
230
+ Fast path (since=None): tail-reads the last limit*3 lines. If event_type is
231
+ specified and the fast path returns fewer than limit results, falls back to a
232
+ full forward scan — this handles rare event types that may be older than the
233
+ tail window. (GPT-D: prevents silent under-return for infrequent events.)
234
+
235
+ Slow path (since=<timestamp>): full forward scan with time filter.
236
+
237
+ Args:
238
+ event_type: Filter by event type (optional)
239
+ since: Unix timestamp - only return events after this time (optional)
240
+ limit: Maximum number of events to return (default 100)
241
+
242
+ Returns:
243
+ List of matching audit entries, newest first
244
+ """
245
+ if not self.audit_file.exists():
246
+ return []
247
+
248
+ from collections import deque
249
+ with self._lock:
250
+ if since is None:
251
+ # Fast path: tail-read for recent events
252
+ raw_lines = self._tail_lines(limit * 3)
253
+ results = []
254
+ for line in reversed(raw_lines): # newest first
255
+ line = line.strip()
256
+ if not line:
257
+ continue
258
+ try:
259
+ entry = json.loads(line)
260
+ # B1-1: Parse-then-filter — no string pre-filter.
261
+ # The previous `if event_type in line` check had false negatives
262
+ # when the event type appeared in data fields. Parsing 300 JSON
263
+ # lines is sub-millisecond on any modern hardware.
264
+ if event_type and entry.get("event") != event_type:
265
+ continue
266
+ results.append(entry)
267
+ if len(results) >= limit:
268
+ break
269
+ except json.JSONDecodeError:
270
+ continue
271
+
272
+ # GPT-D: Fallback for rare event types older than the tail window.
273
+ # Only triggered when event_type is set and fast path is incomplete.
274
+ if event_type and len(results) < limit:
275
+ ring: deque = deque(maxlen=limit)
276
+ try:
277
+ with open(self.audit_file, "r", encoding="utf-8") as f:
278
+ for line in f:
279
+ line = line.strip()
280
+ if not line:
281
+ continue
282
+ try:
283
+ entry = json.loads(line)
284
+ if entry.get("event") != event_type:
285
+ continue
286
+ ring.append(entry)
287
+ except json.JSONDecodeError:
288
+ continue
289
+ except IOError:
290
+ return results # return fast-path partial results on I/O error
291
+ return list(reversed(ring))
292
+
293
+ return results
294
+ else:
295
+ # Slow path: full forward scan for time-filtered queries
296
+ ring = deque(maxlen=limit)
297
+ try:
298
+ with open(self.audit_file, 'r', encoding='utf-8') as f:
299
+ for line in f:
300
+ line = line.strip()
301
+ if not line:
302
+ continue
303
+ # String pre-filter is safe in the slow path — we're scanning
304
+ # every line anyway; this early-exit reduces JSON parse calls
305
+ if event_type and (
306
+ f'"event": "{event_type}"' not in line
307
+ and f'"event":"{event_type}"' not in line
308
+ ):
309
+ continue
310
+ try:
311
+ entry = json.loads(line)
312
+ if entry.get("ts", 0) < since:
313
+ continue
314
+ ring.append(entry)
315
+ except json.JSONDecodeError:
316
+ continue
317
+ except IOError:
318
+ return []
319
+ return list(reversed(ring))
320
+
321
+ def rotate(self) -> None:
322
+ """Rotate log file when it exceeds max_size_mb.
323
+
324
+ Renames current file to .audit.1.jsonl, shifts others, creates new current file.
325
+ """
326
+ with self._lock:
327
+ self._rotate_logs()
328
+
329
+ def stats(self) -> dict:
330
+ """Return audit statistics (counts per event type, file size, etc).
331
+
332
+ Returns:
333
+ Dictionary with statistics about the audit log
334
+ """
335
+ if not self.audit_file.exists():
336
+ return {
337
+ "total_entries": 0,
338
+ "file_size_bytes": 0,
339
+ "event_counts": {},
340
+ "oldest_entry_ts": None,
341
+ "newest_entry_ts": None,
342
+ }
343
+
344
+ event_counts = {}
345
+ total_entries = 0
346
+ oldest_ts = None
347
+ newest_ts = None
348
+
349
+ with self._lock:
350
+ try:
351
+ file_size = self.audit_file.stat().st_size
352
+
353
+ with open(self.audit_file, 'r', encoding='utf-8') as f:
354
+ for line in f:
355
+ line = line.strip()
356
+ if not line:
357
+ continue
358
+
359
+ try:
360
+ entry = json.loads(line)
361
+ total_entries += 1
362
+
363
+ # Count events by type
364
+ event_type = entry.get("event", "unknown")
365
+ event_counts[event_type] = event_counts.get(event_type, 0) + 1
366
+
367
+ # Track timestamp range
368
+ ts = entry.get("ts")
369
+ if ts:
370
+ if oldest_ts is None or ts < oldest_ts:
371
+ oldest_ts = ts
372
+ if newest_ts is None or ts > newest_ts:
373
+ newest_ts = ts
374
+
375
+ except json.JSONDecodeError:
376
+ continue
377
+
378
+ except IOError:
379
+ file_size = 0
380
+
381
+ return {
382
+ "total_entries": total_entries,
383
+ "file_size_bytes": file_size,
384
+ "event_counts": event_counts,
385
+ "oldest_entry_ts": oldest_ts,
386
+ "newest_entry_ts": newest_ts,
387
+ }
388
+
389
+ def anonymize_entry(self, entry: dict) -> dict:
390
+ """Strip PII from an audit entry for safe export.
391
+ Hashes memory content with HMAC-SHA256, preserves structure.
392
+
393
+ Args:
394
+ entry: Original audit entry dictionary
395
+
396
+ Returns:
397
+ New entry with PII fields replaced by HMAC-SHA256 hashes (32 hex chars / 128 bits)
398
+ """
399
+ # Deep copy to avoid modifying original
400
+ anonymized = json.loads(json.dumps(entry))
401
+
402
+ # Recursively anonymize PII fields
403
+ self._anonymize_dict(anonymized)
404
+
405
+ return anonymized
406
+
407
+ def _anonymize_dict(self, obj: dict) -> None:
408
+ """Recursively hash PII fields in a dictionary.
409
+
410
+ Gemini/R3: Uses HMAC-SHA256 (correct MAC construction) and outputs 32 hex
411
+ chars (128 bits) instead of the previous 16 chars (64 bits). HMAC prevents
412
+ length-extension attacks and is the standard construction for keyed hashing.
413
+ """
414
+ for key, value in obj.items():
415
+ if isinstance(value, dict):
416
+ self._anonymize_dict(value)
417
+ elif isinstance(value, str) and key.lower() in self._pii_fields:
418
+ # HMAC-SHA256 with salt as key — correct keyed-hash construction
419
+ digest = hmac.new(
420
+ key=bytes.fromhex(self._salt),
421
+ msg=value.encode("utf-8"),
422
+ digestmod=hashlib.sha256,
423
+ ).hexdigest()
424
+ obj[key] = f"hmac:{digest[:32]}" # 32 hex = 128 bits
425
+
426
+ def _write_entry(self, entry: dict) -> None:
427
+ """Write a single entry to the log file with appropriate locking."""
428
+ json_line = json.dumps(entry, ensure_ascii=False) + '\n'
429
+
430
+ # Open in append mode for atomic writes
431
+ with open(self.audit_file, 'a', encoding='utf-8') as f:
432
+ if FCNTL_AVAILABLE:
433
+ # Unix/Linux file locking
434
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
435
+ try:
436
+ f.write(json_line)
437
+ f.flush()
438
+ finally:
439
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
440
+ elif MSVCRT_AVAILABLE:
441
+ # Windows file locking
442
+ try:
443
+ msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
444
+ f.write(json_line)
445
+ f.flush()
446
+ finally:
447
+ msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
448
+ else:
449
+ # No locking available - just write (may have race conditions)
450
+ f.write(json_line)
451
+ f.flush()
452
+
453
+ def _rotate_logs(self) -> None:
454
+ """Internal log rotation implementation.
455
+
456
+ Wrapped in try/except OSError to handle Windows scenarios where
457
+ antivirus/indexers hold files open, preventing rename operations.
458
+ On failure, rotation is silently skipped and retried on next append.
459
+ """
460
+ if not self.audit_file.exists():
461
+ return
462
+
463
+ try:
464
+ # Shift existing rotated files
465
+ for i in range(self.max_files - 1, 0, -1):
466
+ old_file = self.workspace / f".audit.{i}.jsonl"
467
+ new_file = self.workspace / f".audit.{i + 1}.jsonl"
468
+
469
+ if old_file.exists():
470
+ if new_file.exists():
471
+ new_file.unlink() # Remove oldest file
472
+ old_file.rename(new_file)
473
+
474
+ # Move current file to .1
475
+ rotated_file = self.workspace / ".audit.1.jsonl"
476
+ if rotated_file.exists():
477
+ rotated_file.unlink()
478
+ self.audit_file.rename(rotated_file)
479
+ except OSError:
480
+ # Rotation failed (e.g. file locked by another process on Windows).
481
+ # Skip this cycle — rotation will retry on next log append.
482
+ pass
483
+
484
+ # Create new current file (will be created on next write)
@@ -0,0 +1,65 @@
1
+ """Claim 20: Memory compression & summarization."""
2
+
3
+ import re
4
+ from datetime import datetime, timedelta
5
+ from pathlib import Path
6
+ from typing import Dict, List
7
+
8
+
9
+ class CompressionEngine:
10
+ """Compress old memories into condensed summaries."""
11
+
12
+ @staticmethod
13
+ def compress_file(file_path: str) -> Dict:
14
+ """Read a memory file and produce a compressed summary."""
15
+ try:
16
+ content = Path(file_path).read_text()
17
+ except (FileNotFoundError, UnicodeDecodeError):
18
+ return {"error": f"Cannot read: {file_path}"}
19
+
20
+ lines = content.strip().split("\n")
21
+ headers = [l for l in lines if l.startswith("#")]
22
+ bullets = [l.strip() for l in lines if l.strip().startswith("- ")]
23
+ markers = ["✅", "🎯", "💰", "🚀", "Decision:", "Key:", "Result:"]
24
+ key_lines = [
25
+ l.strip() for l in lines
26
+ if any(m in l for m in markers)
27
+ ]
28
+
29
+ seen = set()
30
+ unique = []
31
+ for b in bullets + key_lines:
32
+ norm = b.lower().strip("- ").strip()
33
+ if norm not in seen and len(norm) > 10:
34
+ seen.add(norm)
35
+ unique.append(b)
36
+
37
+ return {
38
+ "source": file_path,
39
+ "original_lines": len(lines),
40
+ "compressed_lines": len(headers) + len(unique),
41
+ "compression_ratio": round(
42
+ 1 - (len(headers) + len(unique)) / max(len(lines), 1), 2
43
+ ),
44
+ "headers": headers,
45
+ "key_points": unique[:30],
46
+ "compressed_at": datetime.now().isoformat(),
47
+ }
48
+
49
+ @staticmethod
50
+ def compress_old_files(memory_dir: str, days_old: int = 7) -> List[Dict]:
51
+ """Compress daily memory files older than *days_old*."""
52
+ path = Path(memory_dir)
53
+ if not path.exists():
54
+ return []
55
+
56
+ cutoff = datetime.now() - timedelta(days=days_old)
57
+ results = []
58
+ for f in sorted(path.glob("*.md")):
59
+ match = re.search(r"(\d{4}-\d{2}-\d{2})", f.name)
60
+ if not match:
61
+ continue
62
+ file_date = datetime.strptime(match.group(1), "%Y-%m-%d")
63
+ if file_date < cutoff:
64
+ results.append(CompressionEngine.compress_file(str(f)))
65
+ return results