npm - get-claudia - Versions diffs - 1.55.15 → 1.55.16 - Mend

get-claudia 1.55.15 → 1.55.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +13 -0
package/memory-daemon/claudia_memory/daemon/scheduler.py +3 -1
package/memory-daemon/claudia_memory/extraction/entity_extractor.py +32 -46
package/memory-daemon/claudia_memory/services/guards.py +2 -2
package/memory-daemon/claudia_memory/services/remember.py +40 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,19 @@
 All notable changes to Claudia will be documented in this file.
+## 1.55.16 (2026-03-18)
+### Reliability Fixes
+Three fixes for issues surfaced from daemon logs. All backward-compatible, no schema changes.
+- **Overnight jobs now fire after sleep** -- APScheduler's `BackgroundScheduler` now has `misfire_grace_time=14400` (4 hours) and `coalesce=True`. Previously, the default 1-second grace time meant every scheduled job (decay, backup, consolidation, vault sync) was silently skipped when a Mac slept through the 2am-3:15am window. Now jobs fire immediately on wake if missed within the last 4 hours, with multiple missed runs collapsed into one execution.
+- **Reduced log noise from summary memories** -- The content length warning threshold was raised from 500 to 800 chars. Legitimate summary-type memories (550-850 chars) no longer trigger "Long content" warnings. Hard truncation at 1000 chars is unchanged.
+- **Fuzzy entity dedup on write** -- `_ensure_entity()` and `_find_or_create_entity()` now perform a fuzzy pre-check (SequenceMatcher > 0.90) before creating new entities. Name variants like "Kris Krisko" vs "Kris Krisco" (ratio ~0.92) match the existing entity instead of creating a duplicate. Only compares entities of the same type, skips deleted entities.
+- **Expanded STOP_WORDS** -- Added ~55 common English words that spaCy misidentifies as entities ("drawn", "overall", "recently", "several", etc.). Prevents ghost entities from cluttering the graph.
+- **Person entities require 2+ words** -- Regex-extracted person entities must have at least two words (e.g., "First Last"). Single-word extractions like "Metal" or "Drawn" are rejected. spaCy-identified entities are unaffected.
+- 637 tests pass, 0 regressions, 22 new tests across 4 new test files.
 ## 1.55.15 (2026-03-18)
 - **Fix mixed-timezone datetime crash** -- The memory daemon could crash with `can't subtract offset-naive and offset-aware datetimes` when recall or consolidation queries hit records with timezone suffixes (e.g., `+00:00` from email or transcript timestamps). Added a shared `parse_naive()` utility that strips timezone info on parse, applied across 14 locations in 5 files (recall.py, consolidate.py, server.py, vault_sync.py, canvas_generator.py). Replaces the older `[:19]` string truncation workaround. 615 tests pass.

package/memory-daemon/claudia_memory/daemon/scheduler.py CHANGED Viewed

@@ -27,7 +27,9 @@ class MemoryScheduler:
     """Manages scheduled memory maintenance tasks"""
     def __init__(self):
-        self.scheduler = BackgroundScheduler()
+        self.scheduler = BackgroundScheduler(
+            job_defaults={"misfire_grace_time": 14400, "coalesce": True}
+        )
         self.config = get_config()
         self._started = False

package/memory-daemon/claudia_memory/extraction/entity_extractor.py CHANGED Viewed

@@ -188,50 +188,33 @@ class EntityExtractor:
     # Common non-entity words to filter out
     STOP_WORDS = {
-        "monday",
-        "tuesday",
-        "wednesday",
-        "thursday",
-        "friday",
-        "saturday",
-        "sunday",
-        "january",
-        "february",
-        "march",
-        "april",
-        "may",
-        "june",
-        "july",
-        "august",
-        "september",
-        "october",
-        "november",
-        "december",
-        "today",
-        "tomorrow",
-        "yesterday",
-        "morning",
-        "afternoon",
-        "evening",
-        "night",
-        "the",
-        "this",
-        "that",
-        "these",
-        "those",
-        "here",
-        "there",
-        "where",
-        "when",
-        "what",
-        "which",
-        "who",
-        "how",
-        "just",
-        "only",
-        "also",
-        "even",
-        "still",
+        # Days and months
+        "monday", "tuesday", "wednesday", "thursday", "friday",
+        "saturday", "sunday",
+        "january", "february", "march", "april", "may", "june",
+        "july", "august", "september", "october", "november", "december",
+        # Temporal
+        "today", "tomorrow", "yesterday",
+        "morning", "afternoon", "evening", "night",
+        # Pronouns and determiners
+        "the", "this", "that", "these", "those",
+        "here", "there", "where", "when", "what", "which", "who", "how",
+        # Adverbs
+        "just", "only", "also", "even", "still",
+        "recently", "nearly", "almost", "already", "rather",
+        "somewhat", "perhaps", "quite", "likely", "enough",
+        # Quantifiers and adjectives
+        "several", "various", "another", "certain",
+        "much", "many", "some", "most", "both",
+        "each", "every", "other", "such", "same",
+        "new", "old", "big", "long", "last", "next",
+        "good", "well", "nice", "overall", "drawn",
+        # Common verbs (past tense / short forms spaCy misidentifies)
+        "done", "made", "said", "went", "got",
+        "set", "put", "run", "let", "get",
+        # Common nouns too generic to be entities
+        "work", "part", "plan", "team", "data",
+        "note", "time", "home", "call", "open",
     }
     def __init__(self):
@@ -296,12 +279,15 @@ class EntityExtractor:
         """Extract entities using regex patterns"""
         entities = []
-        # Extract persons
+        # Extract persons (require at least 2 words to avoid ghost entities)
         for pattern in self.PERSON_PATTERNS:
             for match in re.finditer(pattern, text):
                 name = match.group(1) if match.lastindex else match.group(0)
                 canonical = self.canonical_name(name)
-                if canonical and len(canonical) > 1 and canonical not in self.STOP_WORDS:
+                if (canonical
+                        and len(canonical) > 1
+                        and canonical not in self.STOP_WORDS
+                        and len(canonical.split()) >= 2):
                     entities.append(
                         ExtractedEntity(
                             name=name,

package/memory-daemon/claudia_memory/services/guards.py CHANGED Viewed

@@ -42,7 +42,7 @@ def validate_memory(
     Validate a memory before storage.
     Checks:
-    - Content length (warn >500, truncate >1000)
+    - Content length (warn >800, truncate >1000)
     - Commitment deadline detection via regex
     - Importance clamped to [0, 1]
     """
@@ -52,7 +52,7 @@ def validate_memory(
     if len(content) > 1000:
         result.warnings.append(f"Content truncated from {len(content)} to 1000 characters")
         result.adjustments["content"] = content[:1000]
-    elif len(content) > 500:
+    elif len(content) > 800:
         result.warnings.append(f"Long content ({len(content)} chars) -- consider breaking into multiple memories")
     # Importance clamping

package/memory-daemon/claudia_memory/services/remember.py CHANGED Viewed

@@ -1697,6 +1697,11 @@ class RememberService:
         if alias_match:
             return alias_match["entity_id"]
+        # Fuzzy pre-check: find near-matches of the same type
+        fuzzy_match = self._fuzzy_find_entity(extracted.canonical_name, extracted.type)
+        if fuzzy_match:
+            return fuzzy_match
         # Create new entity
         return self.remember_entity(
             name=extracted.name,
@@ -1725,9 +1730,44 @@ class RememberService:
         if alias_match:
             return alias_match["entity_id"]
+        # Fuzzy pre-check: find near-matches of the same type
+        fuzzy_match = self._fuzzy_find_entity(canonical, entity_type)
+        if fuzzy_match:
+            return fuzzy_match
         # Create new
         return self.remember_entity(name=name, entity_type=entity_type)
+    def _fuzzy_find_entity(self, canonical: str, entity_type: str) -> Optional[int]:
+        """Find a near-match entity of the same type using fuzzy string matching.
+        Queries entities of the given type and returns the ID of the best match
+        if similarity > 0.90 (SequenceMatcher ratio). Returns None if no match.
+        """
+        from difflib import SequenceMatcher
+        candidates = self.db.execute(
+            "SELECT id, canonical_name FROM entities WHERE type = ? AND deleted_at IS NULL",
+            (entity_type,),
+            fetch=True,
+        ) or []
+        best_id = None
+        best_ratio = 0.0
+        for row in candidates:
+            ratio = SequenceMatcher(None, canonical, row["canonical_name"]).ratio()
+            if ratio > 0.90 and ratio > best_ratio:
+                best_ratio = ratio
+                best_id = row["id"]
+        if best_id is not None:
+            logger.info(
+                f"Fuzzy entity match: '{canonical}' matched existing entity id={best_id} "
+                f"(type={entity_type}, similarity={best_ratio:.2f})"
+            )
+        return best_id
     def _get_or_create_episode(self, source: Optional[str] = None) -> int:
         """Get current episode or create a new one"""
         # For now, create a new episode each time

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "get-claudia",
-  "version": "1.55.15",
+  "version": "1.55.16",
   "description": "An AI assistant who learns how you work.",
   "keywords": [
     "claudia",