get-claudia 1.55.15 → 1.55.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to Claudia will be documented in this file.
4
4
 
5
+ ## 1.55.16 (2026-03-18)
6
+
7
+ ### Reliability Fixes
8
+
9
+ Three fixes for issues surfaced from daemon logs. All backward-compatible, no schema changes.
10
+
11
+ - **Overnight jobs now fire after sleep** -- APScheduler's `BackgroundScheduler` now has `misfire_grace_time=14400` (4 hours) and `coalesce=True`. Previously, the default 1-second grace time meant every scheduled job (decay, backup, consolidation, vault sync) was silently skipped when a Mac slept through the 2am-3:15am window. Now jobs fire immediately on wake if missed within the last 4 hours, with multiple missed runs collapsed into one execution.
12
+ - **Reduced log noise from summary memories** -- The content length warning threshold was raised from 500 to 800 chars. Legitimate summary-type memories (550-850 chars) no longer trigger "Long content" warnings. Hard truncation at 1000 chars is unchanged.
13
+ - **Fuzzy entity dedup on write** -- `_ensure_entity()` and `_find_or_create_entity()` now perform a fuzzy pre-check (SequenceMatcher > 0.90) before creating new entities. Name variants like "Kris Krisko" vs "Kris Krisco" (ratio ~0.92) match the existing entity instead of creating a duplicate. Only compares entities of the same type, skips deleted entities.
14
+ - **Expanded STOP_WORDS** -- Added ~55 common English words that spaCy misidentifies as entities ("drawn", "overall", "recently", "several", etc.). Prevents ghost entities from cluttering the graph.
15
+ - **Person entities require 2+ words** -- Regex-extracted person entities must have at least two words (e.g., "First Last"). Single-word extractions like "Metal" or "Drawn" are rejected. spaCy-identified entities are unaffected.
16
+ - 637 tests pass, 0 regressions, 22 new tests across 4 new test files.
17
+
5
18
  ## 1.55.15 (2026-03-18)
6
19
 
7
20
  - **Fix mixed-timezone datetime crash** -- The memory daemon could crash with `can't subtract offset-naive and offset-aware datetimes` when recall or consolidation queries hit records with timezone suffixes (e.g., `+00:00` from email or transcript timestamps). Added a shared `parse_naive()` utility that strips timezone info on parse, applied across 14 locations in 5 files (recall.py, consolidate.py, server.py, vault_sync.py, canvas_generator.py). Replaces the older `[:19]` string truncation workaround. 615 tests pass.
@@ -27,7 +27,9 @@ class MemoryScheduler:
27
27
  """Manages scheduled memory maintenance tasks"""
28
28
 
29
29
  def __init__(self):
30
- self.scheduler = BackgroundScheduler()
30
+ self.scheduler = BackgroundScheduler(
31
+ job_defaults={"misfire_grace_time": 14400, "coalesce": True}
32
+ )
31
33
  self.config = get_config()
32
34
  self._started = False
33
35
 
@@ -188,50 +188,33 @@ class EntityExtractor:
188
188
 
189
189
  # Common non-entity words to filter out
190
190
  STOP_WORDS = {
191
- "monday",
192
- "tuesday",
193
- "wednesday",
194
- "thursday",
195
- "friday",
196
- "saturday",
197
- "sunday",
198
- "january",
199
- "february",
200
- "march",
201
- "april",
202
- "may",
203
- "june",
204
- "july",
205
- "august",
206
- "september",
207
- "october",
208
- "november",
209
- "december",
210
- "today",
211
- "tomorrow",
212
- "yesterday",
213
- "morning",
214
- "afternoon",
215
- "evening",
216
- "night",
217
- "the",
218
- "this",
219
- "that",
220
- "these",
221
- "those",
222
- "here",
223
- "there",
224
- "where",
225
- "when",
226
- "what",
227
- "which",
228
- "who",
229
- "how",
230
- "just",
231
- "only",
232
- "also",
233
- "even",
234
- "still",
191
+ # Days and months
192
+ "monday", "tuesday", "wednesday", "thursday", "friday",
193
+ "saturday", "sunday",
194
+ "january", "february", "march", "april", "may", "june",
195
+ "july", "august", "september", "october", "november", "december",
196
+ # Temporal
197
+ "today", "tomorrow", "yesterday",
198
+ "morning", "afternoon", "evening", "night",
199
+ # Pronouns and determiners
200
+ "the", "this", "that", "these", "those",
201
+ "here", "there", "where", "when", "what", "which", "who", "how",
202
+ # Adverbs
203
+ "just", "only", "also", "even", "still",
204
+ "recently", "nearly", "almost", "already", "rather",
205
+ "somewhat", "perhaps", "quite", "likely", "enough",
206
+ # Quantifiers and adjectives
207
+ "several", "various", "another", "certain",
208
+ "much", "many", "some", "most", "both",
209
+ "each", "every", "other", "such", "same",
210
+ "new", "old", "big", "long", "last", "next",
211
+ "good", "well", "nice", "overall", "drawn",
212
+ # Common verbs (past tense / short forms spaCy misidentifies)
213
+ "done", "made", "said", "went", "got",
214
+ "set", "put", "run", "let", "get",
215
+ # Common nouns too generic to be entities
216
+ "work", "part", "plan", "team", "data",
217
+ "note", "time", "home", "call", "open",
235
218
  }
236
219
 
237
220
  def __init__(self):
@@ -296,12 +279,15 @@ class EntityExtractor:
296
279
  """Extract entities using regex patterns"""
297
280
  entities = []
298
281
 
299
- # Extract persons
282
+ # Extract persons (require at least 2 words to avoid ghost entities)
300
283
  for pattern in self.PERSON_PATTERNS:
301
284
  for match in re.finditer(pattern, text):
302
285
  name = match.group(1) if match.lastindex else match.group(0)
303
286
  canonical = self.canonical_name(name)
304
- if canonical and len(canonical) > 1 and canonical not in self.STOP_WORDS:
287
+ if (canonical
288
+ and len(canonical) > 1
289
+ and canonical not in self.STOP_WORDS
290
+ and len(canonical.split()) >= 2):
305
291
  entities.append(
306
292
  ExtractedEntity(
307
293
  name=name,
@@ -42,7 +42,7 @@ def validate_memory(
42
42
  Validate a memory before storage.
43
43
 
44
44
  Checks:
45
- - Content length (warn >500, truncate >1000)
45
+ - Content length (warn >800, truncate >1000)
46
46
  - Commitment deadline detection via regex
47
47
  - Importance clamped to [0, 1]
48
48
  """
@@ -52,7 +52,7 @@ def validate_memory(
52
52
  if len(content) > 1000:
53
53
  result.warnings.append(f"Content truncated from {len(content)} to 1000 characters")
54
54
  result.adjustments["content"] = content[:1000]
55
- elif len(content) > 500:
55
+ elif len(content) > 800:
56
56
  result.warnings.append(f"Long content ({len(content)} chars) -- consider breaking into multiple memories")
57
57
 
58
58
  # Importance clamping
@@ -1697,6 +1697,11 @@ class RememberService:
1697
1697
  if alias_match:
1698
1698
  return alias_match["entity_id"]
1699
1699
 
1700
+ # Fuzzy pre-check: find near-matches of the same type
1701
+ fuzzy_match = self._fuzzy_find_entity(extracted.canonical_name, extracted.type)
1702
+ if fuzzy_match:
1703
+ return fuzzy_match
1704
+
1700
1705
  # Create new entity
1701
1706
  return self.remember_entity(
1702
1707
  name=extracted.name,
@@ -1725,9 +1730,44 @@ class RememberService:
1725
1730
  if alias_match:
1726
1731
  return alias_match["entity_id"]
1727
1732
 
1733
+ # Fuzzy pre-check: find near-matches of the same type
1734
+ fuzzy_match = self._fuzzy_find_entity(canonical, entity_type)
1735
+ if fuzzy_match:
1736
+ return fuzzy_match
1737
+
1728
1738
  # Create new
1729
1739
  return self.remember_entity(name=name, entity_type=entity_type)
1730
1740
 
1741
+ def _fuzzy_find_entity(self, canonical: str, entity_type: str) -> Optional[int]:
1742
+ """Find a near-match entity of the same type using fuzzy string matching.
1743
+
1744
+ Queries entities of the given type and returns the ID of the best match
1745
+ if similarity > 0.90 (SequenceMatcher ratio). Returns None if no match.
1746
+ """
1747
+ from difflib import SequenceMatcher
1748
+
1749
+ candidates = self.db.execute(
1750
+ "SELECT id, canonical_name FROM entities WHERE type = ? AND deleted_at IS NULL",
1751
+ (entity_type,),
1752
+ fetch=True,
1753
+ ) or []
1754
+
1755
+ best_id = None
1756
+ best_ratio = 0.0
1757
+ for row in candidates:
1758
+ ratio = SequenceMatcher(None, canonical, row["canonical_name"]).ratio()
1759
+ if ratio > 0.90 and ratio > best_ratio:
1760
+ best_ratio = ratio
1761
+ best_id = row["id"]
1762
+
1763
+ if best_id is not None:
1764
+ logger.info(
1765
+ f"Fuzzy entity match: '{canonical}' matched existing entity id={best_id} "
1766
+ f"(type={entity_type}, similarity={best_ratio:.2f})"
1767
+ )
1768
+
1769
+ return best_id
1770
+
1731
1771
  def _get_or_create_episode(self, source: Optional[str] = None) -> int:
1732
1772
  """Get current episode or create a new one"""
1733
1773
  # For now, create a new episode each time
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "get-claudia",
3
- "version": "1.55.15",
3
+ "version": "1.55.16",
4
4
  "description": "An AI assistant who learns how you work.",
5
5
  "keywords": [
6
6
  "claudia",