get-claudia 1.55.15 → 1.55.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/memory-daemon/claudia_memory/daemon/scheduler.py +3 -1
- package/memory-daemon/claudia_memory/extraction/entity_extractor.py +32 -46
- package/memory-daemon/claudia_memory/services/guards.py +2 -2
- package/memory-daemon/claudia_memory/services/remember.py +40 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Claudia will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## 1.55.16 (2026-03-18)
|
|
6
|
+
|
|
7
|
+
### Reliability Fixes
|
|
8
|
+
|
|
9
|
+
Three fixes for issues surfaced from daemon logs. All backward-compatible, no schema changes.
|
|
10
|
+
|
|
11
|
+
- **Overnight jobs now fire after sleep** -- APScheduler's `BackgroundScheduler` now has `misfire_grace_time=14400` (4 hours) and `coalesce=True`. Previously, the default 1-second grace time meant every scheduled job (decay, backup, consolidation, vault sync) was silently skipped when a Mac slept through the 2am-3:15am window. Now jobs fire immediately on wake if missed within the last 4 hours, with multiple missed runs collapsed into one execution.
|
|
12
|
+
- **Reduced log noise from summary memories** -- The content length warning threshold was raised from 500 to 800 chars. Legitimate summary-type memories (550-850 chars) no longer trigger "Long content" warnings. Hard truncation at 1000 chars is unchanged.
|
|
13
|
+
- **Fuzzy entity dedup on write** -- `_ensure_entity()` and `_find_or_create_entity()` now perform a fuzzy pre-check (SequenceMatcher > 0.90) before creating new entities. Name variants like "Kris Krisko" vs "Kris Krisco" (ratio ~0.92) match the existing entity instead of creating a duplicate. Only compares entities of the same type, skips deleted entities.
|
|
14
|
+
- **Expanded STOP_WORDS** -- Added ~55 common English words that spaCy misidentifies as entities ("drawn", "overall", "recently", "several", etc.). Prevents ghost entities from cluttering the graph.
|
|
15
|
+
- **Person entities require 2+ words** -- Regex-extracted person entities must have at least two words (e.g., "First Last"). Single-word extractions like "Metal" or "Drawn" are rejected. spaCy-identified entities are unaffected.
|
|
16
|
+
- 637 tests pass, 0 regressions, 22 new tests across 4 new test files.
|
|
17
|
+
|
|
5
18
|
## 1.55.15 (2026-03-18)
|
|
6
19
|
|
|
7
20
|
- **Fix mixed-timezone datetime crash** -- The memory daemon could crash with `can't subtract offset-naive and offset-aware datetimes` when recall or consolidation queries hit records with timezone suffixes (e.g., `+00:00` from email or transcript timestamps). Added a shared `parse_naive()` utility that strips timezone info on parse, applied across 14 locations in 5 files (recall.py, consolidate.py, server.py, vault_sync.py, canvas_generator.py). Replaces the older `[:19]` string truncation workaround. 615 tests pass.
|
|
@@ -27,7 +27,9 @@ class MemoryScheduler:
|
|
|
27
27
|
"""Manages scheduled memory maintenance tasks"""
|
|
28
28
|
|
|
29
29
|
def __init__(self):
|
|
30
|
-
self.scheduler = BackgroundScheduler(
|
|
30
|
+
self.scheduler = BackgroundScheduler(
|
|
31
|
+
job_defaults={"misfire_grace_time": 14400, "coalesce": True}
|
|
32
|
+
)
|
|
31
33
|
self.config = get_config()
|
|
32
34
|
self._started = False
|
|
33
35
|
|
|
@@ -188,50 +188,33 @@ class EntityExtractor:
|
|
|
188
188
|
|
|
189
189
|
# Common non-entity words to filter out
|
|
190
190
|
STOP_WORDS = {
|
|
191
|
-
|
|
192
|
-
"tuesday",
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
|
|
197
|
-
"
|
|
198
|
-
"
|
|
199
|
-
|
|
200
|
-
"
|
|
201
|
-
"
|
|
202
|
-
|
|
203
|
-
"
|
|
204
|
-
"
|
|
205
|
-
"
|
|
206
|
-
|
|
207
|
-
"
|
|
208
|
-
"
|
|
209
|
-
"
|
|
210
|
-
"
|
|
211
|
-
"
|
|
212
|
-
|
|
213
|
-
"
|
|
214
|
-
"
|
|
215
|
-
|
|
216
|
-
"
|
|
217
|
-
"
|
|
218
|
-
"this",
|
|
219
|
-
"that",
|
|
220
|
-
"these",
|
|
221
|
-
"those",
|
|
222
|
-
"here",
|
|
223
|
-
"there",
|
|
224
|
-
"where",
|
|
225
|
-
"when",
|
|
226
|
-
"what",
|
|
227
|
-
"which",
|
|
228
|
-
"who",
|
|
229
|
-
"how",
|
|
230
|
-
"just",
|
|
231
|
-
"only",
|
|
232
|
-
"also",
|
|
233
|
-
"even",
|
|
234
|
-
"still",
|
|
191
|
+
# Days and months
|
|
192
|
+
"monday", "tuesday", "wednesday", "thursday", "friday",
|
|
193
|
+
"saturday", "sunday",
|
|
194
|
+
"january", "february", "march", "april", "may", "june",
|
|
195
|
+
"july", "august", "september", "october", "november", "december",
|
|
196
|
+
# Temporal
|
|
197
|
+
"today", "tomorrow", "yesterday",
|
|
198
|
+
"morning", "afternoon", "evening", "night",
|
|
199
|
+
# Pronouns and determiners
|
|
200
|
+
"the", "this", "that", "these", "those",
|
|
201
|
+
"here", "there", "where", "when", "what", "which", "who", "how",
|
|
202
|
+
# Adverbs
|
|
203
|
+
"just", "only", "also", "even", "still",
|
|
204
|
+
"recently", "nearly", "almost", "already", "rather",
|
|
205
|
+
"somewhat", "perhaps", "quite", "likely", "enough",
|
|
206
|
+
# Quantifiers and adjectives
|
|
207
|
+
"several", "various", "another", "certain",
|
|
208
|
+
"much", "many", "some", "most", "both",
|
|
209
|
+
"each", "every", "other", "such", "same",
|
|
210
|
+
"new", "old", "big", "long", "last", "next",
|
|
211
|
+
"good", "well", "nice", "overall", "drawn",
|
|
212
|
+
# Common verbs (past tense / short forms spaCy misidentifies)
|
|
213
|
+
"done", "made", "said", "went", "got",
|
|
214
|
+
"set", "put", "run", "let", "get",
|
|
215
|
+
# Common nouns too generic to be entities
|
|
216
|
+
"work", "part", "plan", "team", "data",
|
|
217
|
+
"note", "time", "home", "call", "open",
|
|
235
218
|
}
|
|
236
219
|
|
|
237
220
|
def __init__(self):
|
|
@@ -296,12 +279,15 @@ class EntityExtractor:
|
|
|
296
279
|
"""Extract entities using regex patterns"""
|
|
297
280
|
entities = []
|
|
298
281
|
|
|
299
|
-
# Extract persons
|
|
282
|
+
# Extract persons (require at least 2 words to avoid ghost entities)
|
|
300
283
|
for pattern in self.PERSON_PATTERNS:
|
|
301
284
|
for match in re.finditer(pattern, text):
|
|
302
285
|
name = match.group(1) if match.lastindex else match.group(0)
|
|
303
286
|
canonical = self.canonical_name(name)
|
|
304
|
-
if
|
|
287
|
+
if (canonical
|
|
288
|
+
and len(canonical) > 1
|
|
289
|
+
and canonical not in self.STOP_WORDS
|
|
290
|
+
and len(canonical.split()) >= 2):
|
|
305
291
|
entities.append(
|
|
306
292
|
ExtractedEntity(
|
|
307
293
|
name=name,
|
|
@@ -42,7 +42,7 @@ def validate_memory(
|
|
|
42
42
|
Validate a memory before storage.
|
|
43
43
|
|
|
44
44
|
Checks:
|
|
45
|
-
- Content length (warn >
|
|
45
|
+
- Content length (warn >800, truncate >1000)
|
|
46
46
|
- Commitment deadline detection via regex
|
|
47
47
|
- Importance clamped to [0, 1]
|
|
48
48
|
"""
|
|
@@ -52,7 +52,7 @@ def validate_memory(
|
|
|
52
52
|
if len(content) > 1000:
|
|
53
53
|
result.warnings.append(f"Content truncated from {len(content)} to 1000 characters")
|
|
54
54
|
result.adjustments["content"] = content[:1000]
|
|
55
|
-
elif len(content) >
|
|
55
|
+
elif len(content) > 800:
|
|
56
56
|
result.warnings.append(f"Long content ({len(content)} chars) -- consider breaking into multiple memories")
|
|
57
57
|
|
|
58
58
|
# Importance clamping
|
|
@@ -1697,6 +1697,11 @@ class RememberService:
|
|
|
1697
1697
|
if alias_match:
|
|
1698
1698
|
return alias_match["entity_id"]
|
|
1699
1699
|
|
|
1700
|
+
# Fuzzy pre-check: find near-matches of the same type
|
|
1701
|
+
fuzzy_match = self._fuzzy_find_entity(extracted.canonical_name, extracted.type)
|
|
1702
|
+
if fuzzy_match:
|
|
1703
|
+
return fuzzy_match
|
|
1704
|
+
|
|
1700
1705
|
# Create new entity
|
|
1701
1706
|
return self.remember_entity(
|
|
1702
1707
|
name=extracted.name,
|
|
@@ -1725,9 +1730,44 @@ class RememberService:
|
|
|
1725
1730
|
if alias_match:
|
|
1726
1731
|
return alias_match["entity_id"]
|
|
1727
1732
|
|
|
1733
|
+
# Fuzzy pre-check: find near-matches of the same type
|
|
1734
|
+
fuzzy_match = self._fuzzy_find_entity(canonical, entity_type)
|
|
1735
|
+
if fuzzy_match:
|
|
1736
|
+
return fuzzy_match
|
|
1737
|
+
|
|
1728
1738
|
# Create new
|
|
1729
1739
|
return self.remember_entity(name=name, entity_type=entity_type)
|
|
1730
1740
|
|
|
1741
|
+
def _fuzzy_find_entity(self, canonical: str, entity_type: str) -> Optional[int]:
|
|
1742
|
+
"""Find a near-match entity of the same type using fuzzy string matching.
|
|
1743
|
+
|
|
1744
|
+
Queries entities of the given type and returns the ID of the best match
|
|
1745
|
+
if similarity > 0.90 (SequenceMatcher ratio). Returns None if no match.
|
|
1746
|
+
"""
|
|
1747
|
+
from difflib import SequenceMatcher
|
|
1748
|
+
|
|
1749
|
+
candidates = self.db.execute(
|
|
1750
|
+
"SELECT id, canonical_name FROM entities WHERE type = ? AND deleted_at IS NULL",
|
|
1751
|
+
(entity_type,),
|
|
1752
|
+
fetch=True,
|
|
1753
|
+
) or []
|
|
1754
|
+
|
|
1755
|
+
best_id = None
|
|
1756
|
+
best_ratio = 0.0
|
|
1757
|
+
for row in candidates:
|
|
1758
|
+
ratio = SequenceMatcher(None, canonical, row["canonical_name"]).ratio()
|
|
1759
|
+
if ratio > 0.90 and ratio > best_ratio:
|
|
1760
|
+
best_ratio = ratio
|
|
1761
|
+
best_id = row["id"]
|
|
1762
|
+
|
|
1763
|
+
if best_id is not None:
|
|
1764
|
+
logger.info(
|
|
1765
|
+
f"Fuzzy entity match: '{canonical}' matched existing entity id={best_id} "
|
|
1766
|
+
f"(type={entity_type}, similarity={best_ratio:.2f})"
|
|
1767
|
+
)
|
|
1768
|
+
|
|
1769
|
+
return best_id
|
|
1770
|
+
|
|
1731
1771
|
def _get_or_create_episode(self, source: Optional[str] = None) -> int:
|
|
1732
1772
|
"""Get current episode or create a new one"""
|
|
1733
1773
|
# For now, create a new episode each time
|