PyPI - mnemosyne-memory - Versions diffs - 2.2__tar.gz → 2.3__tar.gz - Mend

mnemosyne-memory 2.2tar.gz → 2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

{mnemosyne_memory-2.2 → mnemosyne_memory-2.3}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Simple Versioning](https://github.com/AxDSan/mnemosyne) (MAJOR.MINOR).
+## [2.3] — 2026-05-05
+### Added
+**Tiered Episodic Degradation — long-term recall without unbounded growth**
+- Three degradation tiers: Tier 1 (0-30d, full detail), Tier 2 (30-180d, LLM-compressed), Tier 3 (180d+, entity-extracted signal)
+- Automatic tier promotion during `sleep()` — no manual maintenance
+- Tier multipliers in recall scoring: cold memories need 4x stronger semantic match
+- Configurable via `MNEMOSYNE_TIER2_DAYS`, `MNEMOSYNE_TIER3_DAYS`, `MNEMOSYNE_TIER*_WEIGHT`
+- Mnemonics can now truthfully claim "remembers what you told it a year ago"
+**Smart Compression — entity-aware tier 2→3 extraction**
+- `_extract_key_signal()` scores sentences by entity density (proper nouns, acronyms, security terms, tech stack, urgency)
+- Preserves facts buried anywhere in a long memory, not just the first sentence
+- Configurable: `MNEMOSYNE_SMART_COMPRESS=1` (default on), `MNEMOSYNE_TIER3_MAX_CHARS=300`
+**Memory Confidence — veracity signal for every memory**
+- New `veracity` field: `stated`, `inferred`, `tool`, `imported`, `unknown`
+- `remember(veracity="stated")` — set confidence at write time
+- `recall(veracity="stated")` — filter by confidence level
+- Recall applies veracity multiplier to scores (stated=1.0x, inferred=0.7x, tool=0.5x)
+- `get_contaminated()` — surface non-stated memories for review
+- Configurable weights via `MNEMOSYNE_*_WEIGHT` env vars
+### Fixed
+- `local_llm.summarize()` → `summarize_memories()` — would crash on LLM degradation path
+- SQLite connection conflicts in batch degradation tests
+- Removed hallucinated Phase 2 from roadmap
 ## [2.2] — 2026-05-02
 ### Added

{mnemosyne_memory-2.2 → mnemosyne_memory-2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mnemosyne-memory
-Version: 2.2
+Version: 2.3
 Summary: The Zero-Dependency, Sub-Millisecond AI Memory System
 Home-page: https://github.com/AxDSan/mnemosyne
 Author: Abdias J
@@ -27,11 +27,13 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Provides-Extra: llm
 Requires-Dist: ctransformers>=0.2.27; extra == "llm"
+Requires-Dist: llama-cpp-python>=0.2.0; extra == "llm"
 Requires-Dist: huggingface-hub>=0.20; extra == "llm"
 Provides-Extra: embeddings
 Requires-Dist: fastembed>=0.3.0; extra == "embeddings"
 Provides-Extra: all
 Requires-Dist: ctransformers>=0.2.27; extra == "all"
+Requires-Dist: llama-cpp-python>=0.2.0; extra == "all"
 Requires-Dist: huggingface-hub>=0.20; extra == "all"
 Requires-Dist: fastembed>=0.3.0; extra == "all"
 Provides-Extra: dev
@@ -50,7 +52,7 @@ Dynamic: requires-python
 > Native, zero-cloud memory for AI agents. SQLite-backed. Sub-millisecond. Fully private.
 [![Python](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://python.org)
-[![PyPI](https://img.shields.io/pypi/v/mnemosyne-memory.svg?v=2.2)](https://pypi.org/project/mnemosyne-memory/)
+[![PyPI](https://img.shields.io/pypi/v/mnemosyne-memory.svg?v=2.3)](https://pypi.org/project/mnemosyne-memory/)
 [![SQLite](https://img.shields.io/badge/SQLite-3.35+-green.svg)](https://sqlite.org/codeofethics.html)
 [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
 [![CI](https://github.com/AxDSan/mnemosyne/actions/workflows/ci.yml/badge.svg)](https://github.com/AxDSan/mnemosyne/actions/workflows/ci.yml)

{mnemosyne_memory-2.2 → mnemosyne_memory-2.3}/README.md RENAMED Viewed

@@ -5,7 +5,7 @@
 > Native, zero-cloud memory for AI agents. SQLite-backed. Sub-millisecond. Fully private.
 [![Python](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://python.org)
-[![PyPI](https://img.shields.io/pypi/v/mnemosyne-memory.svg?v=2.2)](https://pypi.org/project/mnemosyne-memory/)
+[![PyPI](https://img.shields.io/pypi/v/mnemosyne-memory.svg?v=2.3)](https://pypi.org/project/mnemosyne-memory/)
 [![SQLite](https://img.shields.io/badge/SQLite-3.35+-green.svg)](https://sqlite.org/codeofethics.html)
 [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
 [![CI](https://github.com/AxDSan/mnemosyne/actions/workflows/ci.yml/badge.svg)](https://github.com/AxDSan/mnemosyne/actions/workflows/ci.yml)

{mnemosyne_memory-2.2 → mnemosyne_memory-2.3}/mnemosyne/__init__.py RENAMED Viewed

@@ -10,7 +10,7 @@ Example:
     >>> results = recall("user preferences")
 """
-__version__ = "2.2"
+__version__ = "2.3"
 __author__ = "Abdias J"
 __license__ = "MIT"

{mnemosyne_memory-2.2 → mnemosyne_memory-2.3}/mnemosyne/core/beam.py RENAMED Viewed

@@ -54,6 +54,23 @@ SLEEP_BATCH_SIZE = int(os.environ.get("MNEMOSYNE_SLEEP_BATCH", "5000"))
 SCRATCHPAD_MAX_ITEMS = int(os.environ.get("MNEMOSYNE_SP_MAX", "1000"))
 RECENCY_HALFLIFE_HOURS = float(os.environ.get("MNEMOSYNE_RECENCY_HALFLIFE", "168"))  # 1 week default
+# Tiered episodic degradation
+TIER2_DAYS = int(os.environ.get("MNEMOSYNE_TIER2_DAYS", "30"))
+TIER3_DAYS = int(os.environ.get("MNEMOSYNE_TIER3_DAYS", "180"))
+TIER1_WEIGHT = float(os.environ.get("MNEMOSYNE_TIER1_WEIGHT", "1.0"))
+TIER2_WEIGHT = float(os.environ.get("MNEMOSYNE_TIER2_WEIGHT", "0.5"))
+TIER3_WEIGHT = float(os.environ.get("MNEMOSYNE_TIER3_WEIGHT", "0.25"))
+DEGRADE_BATCH_SIZE = int(os.environ.get("MNEMOSYNE_DEGRADE_BATCH", "100"))
+SMART_COMPRESS = os.environ.get("MNEMOSYNE_SMART_COMPRESS", "1") not in ("0", "false", "no")
+TIER3_MAX_CHARS = int(os.environ.get("MNEMOSYNE_TIER3_MAX_CHARS", "300"))
+# Veracity weighting (memory confidence)
+STATED_WEIGHT = float(os.environ.get("MNEMOSYNE_STATED_WEIGHT", "1.0"))
+INFERRED_WEIGHT = float(os.environ.get("MNEMOSYNE_INFERRED_WEIGHT", "0.7"))
+TOOL_WEIGHT = float(os.environ.get("MNEMOSYNE_TOOL_WEIGHT", "0.5"))
+IMPORTED_WEIGHT = float(os.environ.get("MNEMOSYNE_IMPORTED_WEIGHT", "0.6"))
+UNKNOWN_WEIGHT = float(os.environ.get("MNEMOSYNE_UNKNOWN_WEIGHT", "0.8"))
 # Vector compression: float32 | int8 | bit
 VEC_TYPE = os.environ.get("MNEMOSYNE_VEC_TYPE", "int8").lower()
 if VEC_TYPE not in ("float32", "int8", "bit"):
@@ -125,6 +142,7 @@ def init_beam(db_path: Path = None):
             session_id TEXT DEFAULT 'default',
             importance REAL DEFAULT 0.5,
             metadata_json TEXT,
+            veracity TEXT DEFAULT 'unknown',
             created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
         )
     """)
@@ -144,6 +162,7 @@ def init_beam(db_path: Path = None):
             importance REAL DEFAULT 0.5,
             metadata_json TEXT,
             summary_of TEXT DEFAULT '',
+            veracity TEXT DEFAULT 'unknown',
             created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
         )
     """)
@@ -151,6 +170,27 @@ def init_beam(db_path: Path = None):
     cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_timestamp ON episodic_memory(timestamp)")
     cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_source ON episodic_memory(source)")
+    # --- Tiered degradation migration (v2.3) ---
+    try:
+        cursor.execute("ALTER TABLE episodic_memory ADD COLUMN tier INTEGER DEFAULT 1")
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+    try:
+        cursor.execute("ALTER TABLE episodic_memory ADD COLUMN degraded_at TEXT")
+    except sqlite3.OperationalError:
+        pass
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_tier ON episodic_memory(tier)")
+    # --- Veracity migration (v2.4) ---
+    try:
+        cursor.execute("ALTER TABLE working_memory ADD COLUMN veracity TEXT DEFAULT 'unknown'")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        cursor.execute("ALTER TABLE episodic_memory ADD COLUMN veracity TEXT DEFAULT 'unknown'")
+    except sqlite3.OperationalError:
+        pass
     # --- SCRATCHPAD ---
     cursor.execute("""
         CREATE TABLE IF NOT EXISTS scratchpad (
@@ -691,7 +731,8 @@ class BeamMemory:
                  valid_until: str = None, scope: str = "session",
                  memory_id: str = None,
                  extract_entities: bool = False,
-                 extract: bool = False) -> str:
+                 extract: bool = False,
+                 veracity: str = "unknown") -> str:
         """Store into working_memory. Deduplicates exact content matches.
         When called from the legacy-compatible Mnemosyne.remember() path,
@@ -710,6 +751,7 @@ class BeamMemory:
             extract_entities: If True, extract and store entity mentions as triples
             extract: If True, extract structured facts from content using LLM
                 and store as triples. Default False.
+            veracity: Confidence level — 'stated', 'inferred', 'tool', 'imported', 'unknown'
         """
         # --- Deduplication: exact match ---
         existing_id = self._find_duplicate(content)
@@ -737,11 +779,11 @@ class BeamMemory:
         cursor.execute("""
             INSERT INTO working_memory
             (id, content, source, timestamp, session_id, importance, metadata_json, valid_until, scope,
-             author_id, author_type, channel_id)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+             author_id, author_type, channel_id, veracity)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
         """, (memory_id, content, source, timestamp, self.session_id, importance,
               json.dumps(metadata or {}), valid_until, scope,
-              self.author_id, self.author_type, self.channel_id))
+              self.author_id, self.author_type, self.channel_id, veracity))
         self.conn.commit()
         self._trim_working_memory()
@@ -978,6 +1020,7 @@ class BeamMemory:
                author_id: Optional[str] = None,
                author_type: Optional[str] = None,
                channel_id: Optional[str] = None,
+               veracity: Optional[str] = None,
                temporal_weight: float = 0.0,
                query_time: Optional[Any] = None,
                temporal_halflife: Optional[float] = None,
@@ -1077,6 +1120,9 @@ class BeamMemory:
             # Topic stored in source field for now (pending dedicated topic column)
             wm_where_clauses.append("source = ?")
             wm_params.append(topic)
+        if veracity:
+            wm_where_clauses.append("veracity = ?")
+            wm_params.append(veracity)
         if author_id:
             wm_where_clauses.append("author_id = ?")
             wm_params.append(author_id)
@@ -1093,7 +1139,7 @@ class BeamMemory:
             placeholders = ",".join("?" * len(wm_ids))
             cursor = self.conn.cursor()
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM working_memory
                 WHERE id IN ({placeholders})
                   AND {wm_where}
@@ -1103,7 +1149,7 @@ class BeamMemory:
             # Fallback: fetch recent items and score in Python (old path)
             cursor = self.conn.cursor()
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM working_memory
                 WHERE {wm_where}
                 ORDER BY timestamp DESC
@@ -1169,6 +1215,7 @@ class BeamMemory:
                     "author_id": row["author_id"] if "author_id" in row.keys() else None,
                     "author_type": row["author_type"] if "author_type" in row.keys() else None,
                     "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                    "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                     "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                     "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None
                 })
@@ -1180,7 +1227,7 @@ class BeamMemory:
             placeholders = ",".join("?" * len(entity_memory_ids))
             cursor = self.conn.cursor()
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM working_memory
                 WHERE id IN ({placeholders})
                   AND {wm_where}
@@ -1222,6 +1269,7 @@ class BeamMemory:
                         "author_id": row["author_id"] if "author_id" in row.keys() else None,
                         "author_type": row["author_type"] if "author_type" in row.keys() else None,
                         "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                        "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                         "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                         "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None,
                         "entity_match": True
@@ -1240,7 +1288,7 @@ class BeamMemory:
                 em_entity_params = [*tuple(entity_memory_ids), self.session_id]
             em_entity_params.extend([datetime.now().isoformat()])
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM episodic_memory
                 WHERE id IN ({em_placeholders})
                   AND {em_entity_scope}
@@ -1282,6 +1330,7 @@ class BeamMemory:
                         "author_id": row["author_id"] if "author_id" in row.keys() else None,
                         "author_type": row["author_type"] if "author_type" in row.keys() else None,
                         "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                        "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                         "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                         "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None,
                         "entity_match": True
@@ -1294,7 +1343,7 @@ class BeamMemory:
             cursor = self.conn.cursor()
             # Check working_memory for fact matches
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM working_memory
                 WHERE id IN ({placeholders})
                   AND {wm_where}
@@ -1334,6 +1383,7 @@ class BeamMemory:
                         "author_id": row["author_id"] if "author_id" in row.keys() else None,
                         "author_type": row["author_type"] if "author_type" in row.keys() else None,
                         "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                        "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                         "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                         "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None,
                         "fact_match": True
@@ -1351,7 +1401,7 @@ class BeamMemory:
                 fact_em_params = [*tuple(fact_memory_ids), self.session_id]
             fact_em_params.extend([datetime.now().isoformat()])
             cursor.execute(f"""
-                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id
+                SELECT id, content, source, timestamp, importance, recall_count, last_recalled, valid_until, superseded_by, scope, author_id, author_type, channel_id, veracity
                 FROM episodic_memory
                 WHERE id IN ({placeholders})
                   AND {fact_em_scope}
@@ -1393,6 +1443,7 @@ class BeamMemory:
                         "author_id": row["author_id"] if "author_id" in row.keys() else None,
                         "author_type": row["author_type"] if "author_type" in row.keys() else None,
                         "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                        "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                         "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                         "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None,
                         "fact_match": True
@@ -1457,6 +1508,9 @@ class BeamMemory:
         if topic:
             em_where_clauses.append("source = ?")
             em_params.append(topic)
+        if veracity:
+            em_where_clauses.append("veracity = ?")
+            em_params.append(veracity)
         if author_id:
             em_where_clauses.append("author_id = ?")
             em_params.append(author_id)
@@ -1565,10 +1619,34 @@ class BeamMemory:
                         "author_id": row["author_id"] if "author_id" in row.keys() else None,
                         "author_type": row["author_type"] if "author_type" in row.keys() else None,
                         "channel_id": row["channel_id"] if "channel_id" in row.keys() else None,
+                        "veracity": row["veracity"] if "veracity" in row.keys() else "unknown",
                         "valid_until": row["valid_until"] if "valid_until" in row.keys() else None,
                         "superseded_by": row["superseded_by"] if "superseded_by" in row.keys() else None
                     })
+        # --- Tiered degradation weighting: apply tier multiplier to episodic scores ---
+        weight_map = {1: TIER1_WEIGHT, 2: TIER2_WEIGHT, 3: TIER3_WEIGHT}
+        veracity_map = {"stated": STATED_WEIGHT, "inferred": INFERRED_WEIGHT,
+                        "tool": TOOL_WEIGHT, "imported": IMPORTED_WEIGHT,
+                        "unknown": UNKNOWN_WEIGHT}
+        em_ids_for_tier = [r["id"] for r in results if r.get("tier") == "episodic"]
+        if em_ids_for_tier:
+            placeholders = ",".join("?" * len(em_ids_for_tier))
+            tier_rows = cursor.execute(
+                f"SELECT id, tier, veracity FROM episodic_memory WHERE id IN ({placeholders})",
+                em_ids_for_tier
+            ).fetchall()
+            tier_lookup = {r["id"]: (r["tier"] or 1) for r in tier_rows}
+            veracity_lookup = {r["id"]: (r["veracity"] or "unknown") for r in tier_rows}
+            for r in results:
+                if r.get("tier") == "episodic":
+                    ep_tier = tier_lookup.get(r["id"], 1)
+                    ep_veracity = veracity_lookup.get(r["id"], "unknown")
+                    r["degradation_tier"] = ep_tier
+                    r["veracity"] = ep_veracity
+                    r["score"] *= weight_map.get(ep_tier, 1.0)
+                    r["score"] *= veracity_map.get(ep_veracity, UNKNOWN_WEIGHT)
         results.sort(key=lambda x: x["score"], reverse=True)
         final_results = results[:top_k]
@@ -1670,6 +1748,172 @@ class BeamMemory:
         self.conn.execute("DELETE FROM scratchpad WHERE session_id = ?", (self.session_id,))
         self.conn.commit()
+    # ------------------------------------------------------------------
+    # Tiered Episodic Degradation
+    # ------------------------------------------------------------------
+    def _extract_key_signal(self, content: str, max_chars: int = 300) -> str:
+        """Extract the highest-signal sentences from content for tier 3 compression.
+        Scores each sentence by entity/keyword density (proper nouns, technical
+        terms, preference indicators) and keeps top-scoring sentences until the
+        character budget is reached. Falls back to first-N-chars if content has
+        no clear sentence boundaries.
+        """
+        import re
+        if len(content) <= max_chars:
+            return content
+        # Split into sentences
+        sentences = re.split(r'(?<=[.!?])\s+', content)
+        if len(sentences) <= 1:
+            # No sentence boundaries — take first max_chars
+            return content[:max_chars] + " [...]"
+        # Scoring patterns
+        signal_patterns = [
+            (r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', 3),     # Proper nouns: "GitHub Actions", "Docker Compose"
+            (r'\b[A-Z]{2,}\b', 3),                            # Acronyms: "XKCD", "CI/CD", "API", "AWS"
+            (r'\b(Docker|Kubernetes|AWS|GCP|Azure|Terraform|Python|Rust|Go|TypeScript|React|Next\.?js|Node\.?js|SQLite|Postgres|Redis|nginx|systemd|Linux|macOS|Windows)\b', 4),
+            (r'\b(prefers?|uses?|likes?|loves?|hates?|dislikes?|wants?|needs?)\b', 2),  # Preference indicators
+            (r'\b(password|token|secret|key|credential|auth|encrypt|decrypt|private)\b', 3),  # Security terms
+            (r'\b(production|staging|deploy|database|backup|migration)\b', 2),  # Infra terms
+            (r'\b(critical|urgent|important|breaking|incident|outage|down)\b', 3),  # Urgency
+            (r'\b(always|never|every|must|should)\b', 1),  # Emphasis words
+            (r'\b(\d{1,3}\.\d{1,3}\.\d{1,3})\b', 3),  # Version numbers
+            (r'\b(https?://|www\.|[a-z]+\.[a-z]{2,})\b', 2),  # URLs / domains
+            (r'["\'].*?["\']', 1),  # Quoted strings
+        ]
+        scored = []
+        for sentence in sentences:
+            if not sentence.strip():
+                continue
+            score = 0
+            # Bonus for shorter sentences (signal density)
+            if len(sentence) < 120:
+                score += 1
+            for pattern, weight in signal_patterns:
+                score += len(re.findall(pattern, sentence)) * weight
+            scored.append((score, sentence))
+        # Sort by score descending, keep top sentences up to max_chars
+        scored.sort(key=lambda x: x[0], reverse=True)
+        result = []
+        used = 0
+        for _, sentence in scored:
+            if used + len(sentence) + 1 > max_chars:
+                break
+            result.append(sentence)
+            used += len(sentence) + 1  # +1 for space
+        if not result:
+            return content[:max_chars] + " [...]"
+        compressed = " ".join(result)
+        if len(content) > len(compressed):
+            compressed += " [...]"
+        return compressed
+    def degrade_episodic(self, dry_run: bool = False) -> Dict:
+        """Degrade old episodic memories through tier 1→2→3 compression.
+        Tier 1 (0-TIER2_DAYS): Full detail, 1.0x recall weight
+        Tier 2 (TIER2_DAYS-TIER3_DAYS): LLM-summarized, 0.5x weight
+        Tier 3 (TIER3_DAYS+): Text extraction compressed, 0.25x weight
+        Returns summary of tier transitions performed.
+        """
+        cursor = self.conn.cursor()
+        now = datetime.now()
+        results = {"status": "dry_run" if dry_run else "degraded",
+                   "tier1_to_tier2": 0, "tier2_to_tier3": 0}
+        # --- Find candidates for degradation ---
+        tier2_cutoff = (now - timedelta(days=TIER2_DAYS)).isoformat()
+        tier3_cutoff = (now - timedelta(days=TIER3_DAYS)).isoformat()
+        # Tier 1 → Tier 2: old enough, still at tier 1
+        cursor.execute("""
+            SELECT id, content, importance FROM episodic_memory
+            WHERE tier = 1 AND created_at < ?
+            ORDER BY created_at ASC LIMIT ?
+        """, (tier2_cutoff, DEGRADE_BATCH_SIZE))
+        tier1_rows = cursor.fetchall()
+        # Tier 2 → Tier 3: very old, at tier 2
+        cursor.execute("""
+            SELECT id, content FROM episodic_memory
+            WHERE tier = 2 AND created_at < ?
+            ORDER BY created_at ASC LIMIT ?
+        """, (tier3_cutoff, DEGRADE_BATCH_SIZE // 2))
+        tier2_rows = cursor.fetchall()
+        if dry_run:
+            results["tier1_to_tier2"] = len(tier1_rows)
+            results["tier2_to_tier3"] = len(tier2_rows)
+            return results
+        # --- Degrade tier 1 → tier 2: LLM summarization ---
+        from mnemosyne.core import local_llm
+        for row in tier1_rows:
+            try:
+                compressed = row["content"]
+                if local_llm.llm_available() and len(row["content"]) > 300:
+                    summary = local_llm.summarize_memories([row["content"]])
+                    if summary:
+                        compressed = summary[:400]
+                cursor.execute(
+                    "UPDATE episodic_memory SET content = ?, tier = 2, degraded_at = ? WHERE id = ?",
+                    (compressed[:800], now.isoformat(), row["id"])
+                )
+                results["tier1_to_tier2"] += 1
+            except Exception:
+                pass
+        # --- Degrade tier 2 → tier 3: smart extraction (keep key entities) ---
+        for row in tier2_rows:
+            try:
+                content = row["content"]
+                if SMART_COMPRESS and len(content) > TIER3_MAX_CHARS:
+                    compressed = self._extract_key_signal(content, max_chars=TIER3_MAX_CHARS)
+                else:
+                    compressed = content[:TIER3_MAX_CHARS]
+                    if len(content) > TIER3_MAX_CHARS:
+                        compressed += " [...]"
+                cursor.execute(
+                    "UPDATE episodic_memory SET content = ?, tier = 3, degraded_at = ? WHERE id = ?",
+                    (compressed, now.isoformat(), row["id"])
+                )
+                results["tier2_to_tier3"] += 1
+            except Exception:
+                pass
+        self.conn.commit()
+        return results
+    def get_contaminated(self, limit: int = 50, min_importance: float = 0.0) -> List[Dict]:
+        """Return potentially contaminated memories for review.
+        Contaminated = veracity in ('inferred', 'tool', 'imported', 'unknown')
+        — i.e., anything not explicitly stated by the user. Sorted by
+        importance descending so the highest-stakes items surface first.
+        Args:
+            limit: Max memories to return
+            min_importance: Only return memories with importance >= this
+        """
+        cursor = self.conn.cursor()
+        cursor.execute("""
+            SELECT id, content, source, veracity, tier, importance,
+                   created_at, degraded_at, session_id
+            FROM episodic_memory
+            WHERE veracity IN ('inferred', 'tool', 'imported', 'unknown')
+              AND importance >= ?
+            ORDER BY importance DESC, created_at DESC
+            LIMIT ?
+        """, (min_importance, limit))
+        return [dict(row) for row in cursor.fetchall()]
     # ------------------------------------------------------------------
     # Consolidation / Sleep
     # ------------------------------------------------------------------
@@ -1785,13 +2029,17 @@ class BeamMemory:
             """, (self.session_id, len(consolidated_ids), f"{summaries_created} summaries ({method}) from {len(consolidated_ids)} items"))
             self.conn.commit()
+        # Run tiered degradation after consolidation
+        degrade_result = self.degrade_episodic(dry_run=dry_run)
         return {
             "status": "dry_run" if dry_run else "consolidated",
             "items_consolidated": len(consolidated_ids),
             "summaries_created": summaries_created,
             "llm_used": llm_used_count,
             "method": method,
-            "consolidated_ids": consolidated_ids
+            "consolidated_ids": consolidated_ids,
+            "degradation": degrade_result
         }
     def sleep_all_sessions(self, dry_run: bool = False) -> Dict:
@@ -1855,6 +2103,9 @@ class BeamMemory:
             except Exception as exc:
                 errors.append({"session_id": session_id, "error": repr(exc)})
+        # Run tiered degradation after all-sessions consolidation
+        degrade_result = self.degrade_episodic(dry_run=dry_run)
         return {
             "status": "dry_run" if dry_run else ("consolidated" if items_consolidated else "no_op"),
             "sessions_scanned": len(session_rows),
@@ -1865,6 +2116,7 @@ class BeamMemory:
             "errors": len(errors),
             "error_details": errors,
             "session_results": session_results,
+            "degradation": degrade_result
         }
     def get_consolidation_log(self, limit: int = 10) -> List[Dict]:

mnemosyne-memory 2.2__tar.gz → 2.3__tar.gz

mnemosyne-memory 2.2tar.gz → 2.3tar.gz