get-claudia 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/memory-daemon/claudia_memory/config.py +8 -0
- package/memory-daemon/claudia_memory/daemon/scheduler.py +19 -0
- package/memory-daemon/claudia_memory/database.py +37 -0
- package/memory-daemon/claudia_memory/mcp/server.py +33 -0
- package/memory-daemon/claudia_memory/schema.sql +7 -0
- package/memory-daemon/claudia_memory/services/consolidate.py +241 -16
- package/memory-daemon/claudia_memory/services/guards.py +131 -0
- package/memory-daemon/claudia_memory/services/remember.py +31 -0
- package/memory-daemon/claudia_memory/services/verify.py +279 -0
- package/memory-daemon/tests/test_guards.py +75 -0
- package/memory-daemon/tests/test_merge.py +158 -0
- package/memory-daemon/tests/test_prediction_feedback.py +109 -0
- package/memory-daemon/tests/test_verify.py +172 -0
- package/package.json +1 -1
- package/template-v2/.claude/commands/research.md +134 -0
- package/template-v2/.claude/skills/concierge.md +246 -0
- package/template-v2/.claude/skills/connector-discovery.md +12 -2
- package/template-v2/.mcp.json.example +14 -0
- package/template-v2/CLAUDE.md +1 -0
|
@@ -40,6 +40,14 @@ class MemoryConfig:
|
|
|
40
40
|
recency_weight: float = 0.10 # Weight for recency
|
|
41
41
|
fts_weight: float = 0.15 # Weight for FTS5 full-text search match
|
|
42
42
|
|
|
43
|
+
# Memory merging
|
|
44
|
+
similarity_merge_threshold: float = 0.92 # Cosine similarity threshold for merging
|
|
45
|
+
enable_memory_merging: bool = True # Toggle memory merging during consolidation
|
|
46
|
+
|
|
47
|
+
# Verification
|
|
48
|
+
verify_interval_minutes: int = 60 # How often to run background verification
|
|
49
|
+
verify_batch_size: int = 20 # Max memories to verify per run
|
|
50
|
+
|
|
43
51
|
# Health check
|
|
44
52
|
health_port: int = 3848
|
|
45
53
|
|
|
@@ -19,6 +19,7 @@ from ..services.consolidate import (
|
|
|
19
19
|
run_decay,
|
|
20
20
|
run_full_consolidation,
|
|
21
21
|
)
|
|
22
|
+
from ..services.verify import run_verification
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
24
25
|
|
|
@@ -73,6 +74,15 @@ class MemoryScheduler:
|
|
|
73
74
|
replace_existing=True,
|
|
74
75
|
)
|
|
75
76
|
|
|
77
|
+
# Periodic: Memory verification
|
|
78
|
+
self.scheduler.add_job(
|
|
79
|
+
self._run_memory_verification,
|
|
80
|
+
IntervalTrigger(minutes=self.config.verify_interval_minutes),
|
|
81
|
+
id="memory_verification",
|
|
82
|
+
name="Background memory verification",
|
|
83
|
+
replace_existing=True,
|
|
84
|
+
)
|
|
85
|
+
|
|
76
86
|
self.scheduler.start()
|
|
77
87
|
self._started = True
|
|
78
88
|
logger.info("Memory scheduler started")
|
|
@@ -136,6 +146,15 @@ class MemoryScheduler:
|
|
|
136
146
|
except Exception as e:
|
|
137
147
|
logger.exception("Error in prediction generation")
|
|
138
148
|
|
|
149
|
+
def _run_memory_verification(self) -> None:
|
|
150
|
+
"""Run background memory verification"""
|
|
151
|
+
try:
|
|
152
|
+
logger.debug("Running memory verification")
|
|
153
|
+
result = run_verification()
|
|
154
|
+
logger.debug(f"Memory verification complete: {result}")
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logger.exception("Error in memory verification")
|
|
157
|
+
|
|
139
158
|
|
|
140
159
|
# Global scheduler instance
|
|
141
160
|
_scheduler: Optional[MemoryScheduler] = None
|
|
@@ -303,6 +303,43 @@ class Database:
|
|
|
303
303
|
logger.warning(f"Migration 4 (FTS5) failed: {e}. FTS5 may not be available.")
|
|
304
304
|
# FTS5 is optional; the system degrades gracefully without it
|
|
305
305
|
|
|
306
|
+
if current_version < 5:
|
|
307
|
+
# Migration 5: Add verification columns to memories, pattern_name to predictions
|
|
308
|
+
migration_stmts = [
|
|
309
|
+
"ALTER TABLE memories ADD COLUMN verified_at TEXT",
|
|
310
|
+
"ALTER TABLE memories ADD COLUMN verification_status TEXT DEFAULT 'pending'",
|
|
311
|
+
"ALTER TABLE predictions ADD COLUMN prediction_pattern_name TEXT",
|
|
312
|
+
]
|
|
313
|
+
for stmt in migration_stmts:
|
|
314
|
+
try:
|
|
315
|
+
conn.execute(stmt)
|
|
316
|
+
except sqlite3.OperationalError as e:
|
|
317
|
+
if "duplicate column" not in str(e).lower():
|
|
318
|
+
logger.warning(f"Migration 5 statement failed: {e}")
|
|
319
|
+
|
|
320
|
+
# Index for verification queries
|
|
321
|
+
try:
|
|
322
|
+
conn.execute(
|
|
323
|
+
"CREATE INDEX IF NOT EXISTS idx_memories_verification ON memories(verification_status)"
|
|
324
|
+
)
|
|
325
|
+
except sqlite3.OperationalError as e:
|
|
326
|
+
logger.warning(f"Migration 5 index failed: {e}")
|
|
327
|
+
|
|
328
|
+
# Grandfather existing memories as verified
|
|
329
|
+
try:
|
|
330
|
+
conn.execute(
|
|
331
|
+
"""UPDATE memories SET verification_status = 'verified', verified_at = datetime('now')
|
|
332
|
+
WHERE verification_status = 'pending' OR verification_status IS NULL"""
|
|
333
|
+
)
|
|
334
|
+
except sqlite3.OperationalError as e:
|
|
335
|
+
logger.warning(f"Migration 5 grandfather failed: {e}")
|
|
336
|
+
|
|
337
|
+
conn.execute(
|
|
338
|
+
"INSERT OR IGNORE INTO schema_migrations (version, description) VALUES (5, 'Add verification columns to memories, prediction_pattern_name to predictions')"
|
|
339
|
+
)
|
|
340
|
+
conn.commit()
|
|
341
|
+
logger.info("Applied migration 5: memory verification and prediction feedback")
|
|
342
|
+
|
|
306
343
|
def execute(
|
|
307
344
|
self, sql: str, params: Tuple = (), fetch: bool = False
|
|
308
345
|
) -> Optional[List[sqlite3.Row]]:
|
|
@@ -570,6 +570,24 @@ async def list_tools() -> ListToolsResult:
|
|
|
570
570
|
"properties": {},
|
|
571
571
|
},
|
|
572
572
|
),
|
|
573
|
+
Tool(
|
|
574
|
+
name="memory.prediction_feedback",
|
|
575
|
+
description="Provide feedback on a prediction -- mark whether the user acted on it. This trains future prediction priority.",
|
|
576
|
+
inputSchema={
|
|
577
|
+
"type": "object",
|
|
578
|
+
"properties": {
|
|
579
|
+
"prediction_id": {
|
|
580
|
+
"type": "integer",
|
|
581
|
+
"description": "The prediction ID to provide feedback for",
|
|
582
|
+
},
|
|
583
|
+
"acted_on": {
|
|
584
|
+
"type": "boolean",
|
|
585
|
+
"description": "Whether the user acted on this prediction",
|
|
586
|
+
},
|
|
587
|
+
},
|
|
588
|
+
"required": ["prediction_id", "acted_on"],
|
|
589
|
+
},
|
|
590
|
+
),
|
|
573
591
|
Tool(
|
|
574
592
|
name="cognitive.ingest",
|
|
575
593
|
description=(
|
|
@@ -1021,6 +1039,21 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1021
1039
|
]
|
|
1022
1040
|
)
|
|
1023
1041
|
|
|
1042
|
+
elif name == "memory.prediction_feedback":
|
|
1043
|
+
svc = get_consolidate_service()
|
|
1044
|
+
svc.mark_prediction_acted_on(
|
|
1045
|
+
prediction_id=arguments["prediction_id"],
|
|
1046
|
+
acted_on=arguments["acted_on"],
|
|
1047
|
+
)
|
|
1048
|
+
return CallToolResult(
|
|
1049
|
+
content=[
|
|
1050
|
+
TextContent(
|
|
1051
|
+
type="text",
|
|
1052
|
+
text=json.dumps({"success": True, "prediction_id": arguments["prediction_id"]}),
|
|
1053
|
+
)
|
|
1054
|
+
]
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1024
1057
|
elif name == "memory.trace":
|
|
1025
1058
|
result = trace_memory(memory_id=arguments["memory_id"])
|
|
1026
1059
|
return CallToolResult(
|
|
@@ -58,6 +58,8 @@ CREATE TABLE IF NOT EXISTS memories (
|
|
|
58
58
|
updated_at TEXT DEFAULT (datetime('now')),
|
|
59
59
|
last_accessed_at TEXT, -- For rehearsal-based importance boost
|
|
60
60
|
access_count INTEGER DEFAULT 0,
|
|
61
|
+
verified_at TEXT, -- When this memory was verified
|
|
62
|
+
verification_status TEXT DEFAULT 'pending', -- pending, verified, flagged, contradicts
|
|
61
63
|
metadata TEXT -- JSON blob for flexible attributes
|
|
62
64
|
);
|
|
63
65
|
|
|
@@ -65,6 +67,7 @@ CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
|
|
|
65
67
|
CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance DESC);
|
|
66
68
|
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
|
|
67
69
|
CREATE INDEX IF NOT EXISTS idx_memories_hash ON memories(content_hash);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_memories_verification ON memories(verification_status);
|
|
68
71
|
|
|
69
72
|
-- Junction table linking memories to entities
|
|
70
73
|
CREATE TABLE IF NOT EXISTS memory_entities (
|
|
@@ -171,6 +174,7 @@ CREATE TABLE IF NOT EXISTS predictions (
|
|
|
171
174
|
is_acted_on INTEGER DEFAULT 0, -- Whether user acted on this
|
|
172
175
|
created_at TEXT DEFAULT (datetime('now')),
|
|
173
176
|
shown_at TEXT,
|
|
177
|
+
prediction_pattern_name TEXT, -- Links to pattern for feedback loop
|
|
174
178
|
metadata TEXT
|
|
175
179
|
);
|
|
176
180
|
|
|
@@ -256,3 +260,6 @@ VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episo
|
|
|
256
260
|
-- NOTE: FTS5 full-text search (migration v4) is created by database.py migration code
|
|
257
261
|
-- rather than here, because CREATE TRIGGER statements contain internal semicolons
|
|
258
262
|
-- that the schema.sql line-based parser cannot handle.
|
|
263
|
+
|
|
264
|
+
INSERT OR IGNORE INTO schema_migrations (version, description)
|
|
265
|
+
VALUES (5, 'Add verification columns to memories, prediction_pattern_name to predictions');
|
|
@@ -18,6 +18,18 @@ from ..database import get_db
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
def _cosine_similarity(a: list, b: list) -> float:
|
|
22
|
+
"""Pure Python cosine similarity between two vectors."""
|
|
23
|
+
if len(a) != len(b) or len(a) == 0:
|
|
24
|
+
return 0.0
|
|
25
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
26
|
+
norm_a = sum(x * x for x in a) ** 0.5
|
|
27
|
+
norm_b = sum(x * x for x in b) ** 0.5
|
|
28
|
+
if norm_a == 0 or norm_b == 0:
|
|
29
|
+
return 0.0
|
|
30
|
+
return dot / (norm_a * norm_b)
|
|
31
|
+
|
|
32
|
+
|
|
21
33
|
@dataclass
|
|
22
34
|
class DetectedPattern:
|
|
23
35
|
"""A pattern detected in the user's behavior or data"""
|
|
@@ -38,6 +50,7 @@ class Prediction:
|
|
|
38
50
|
priority: float
|
|
39
51
|
expires_at: Optional[datetime]
|
|
40
52
|
metadata: Optional[Dict] = None
|
|
53
|
+
pattern_name: Optional[str] = None
|
|
41
54
|
|
|
42
55
|
|
|
43
56
|
class ConsolidateService:
|
|
@@ -409,13 +422,15 @@ class ConsolidateService:
|
|
|
409
422
|
)
|
|
410
423
|
|
|
411
424
|
for pattern in cooling_patterns:
|
|
425
|
+
feedback = self._get_pattern_feedback("suggestion", pattern["name"])
|
|
412
426
|
predictions.append(
|
|
413
427
|
Prediction(
|
|
414
428
|
content=pattern["description"],
|
|
415
429
|
prediction_type="suggestion",
|
|
416
|
-
priority=pattern["confidence"],
|
|
430
|
+
priority=pattern["confidence"] * feedback,
|
|
417
431
|
expires_at=datetime.utcnow() + timedelta(days=7),
|
|
418
432
|
metadata={"pattern_id": pattern["id"]},
|
|
433
|
+
pattern_name=pattern["name"],
|
|
419
434
|
)
|
|
420
435
|
)
|
|
421
436
|
|
|
@@ -438,13 +453,16 @@ class ConsolidateService:
|
|
|
438
453
|
days_old = (datetime.utcnow() - created).days
|
|
439
454
|
|
|
440
455
|
if days_old > 3:
|
|
456
|
+
pattern_name = f"commitment_reminder_{commitment['id']}"
|
|
457
|
+
feedback = self._get_pattern_feedback("reminder", pattern_name)
|
|
441
458
|
predictions.append(
|
|
442
459
|
Prediction(
|
|
443
460
|
content=f"Commitment from {days_old} days ago: {commitment['content'][:100]}",
|
|
444
461
|
prediction_type="reminder",
|
|
445
|
-
priority=min(1.0, 0.5 + days_old / 14),
|
|
462
|
+
priority=min(1.0, 0.5 + days_old / 14) * feedback,
|
|
446
463
|
expires_at=datetime.utcnow() + timedelta(days=2),
|
|
447
464
|
metadata={"memory_id": commitment["id"]},
|
|
465
|
+
pattern_name=pattern_name,
|
|
448
466
|
)
|
|
449
467
|
)
|
|
450
468
|
|
|
@@ -464,13 +482,15 @@ class ConsolidateService:
|
|
|
464
482
|
|
|
465
483
|
for pattern in patterns:
|
|
466
484
|
if pattern["pattern_type"] == "behavioral":
|
|
485
|
+
feedback = self._get_pattern_feedback("insight", pattern["name"])
|
|
467
486
|
predictions.append(
|
|
468
487
|
Prediction(
|
|
469
488
|
content=f"Pattern noticed: {pattern['description']}",
|
|
470
489
|
prediction_type="insight",
|
|
471
|
-
priority=pattern["confidence"] * 0.8,
|
|
490
|
+
priority=pattern["confidence"] * 0.8 * feedback,
|
|
472
491
|
expires_at=datetime.utcnow() + timedelta(days=14),
|
|
473
492
|
metadata={"pattern_id": pattern["id"]},
|
|
493
|
+
pattern_name=pattern["name"],
|
|
474
494
|
)
|
|
475
495
|
)
|
|
476
496
|
|
|
@@ -478,19 +498,19 @@ class ConsolidateService:
|
|
|
478
498
|
|
|
479
499
|
def _store_prediction(self, prediction: Prediction) -> int:
|
|
480
500
|
"""Store a prediction in the database"""
|
|
481
|
-
|
|
482
|
-
"
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
)
|
|
501
|
+
data = {
|
|
502
|
+
"content": prediction.content,
|
|
503
|
+
"prediction_type": prediction.prediction_type,
|
|
504
|
+
"priority": prediction.priority,
|
|
505
|
+
"expires_at": prediction.expires_at.isoformat() if prediction.expires_at else None,
|
|
506
|
+
"is_shown": 0,
|
|
507
|
+
"is_acted_on": 0,
|
|
508
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
509
|
+
"metadata": json.dumps(prediction.metadata) if prediction.metadata else None,
|
|
510
|
+
}
|
|
511
|
+
if prediction.pattern_name:
|
|
512
|
+
data["prediction_pattern_name"] = prediction.pattern_name
|
|
513
|
+
return self.db.insert("predictions", data)
|
|
494
514
|
|
|
495
515
|
def get_predictions(
|
|
496
516
|
self,
|
|
@@ -539,6 +559,208 @@ class ConsolidateService:
|
|
|
539
559
|
(prediction_id,),
|
|
540
560
|
)
|
|
541
561
|
|
|
562
|
+
def mark_prediction_acted_on(self, prediction_id: int, acted_on: bool) -> None:
|
|
563
|
+
"""Mark a prediction as acted on (or not) by the user"""
|
|
564
|
+
self.db.update(
|
|
565
|
+
"predictions",
|
|
566
|
+
{"is_acted_on": 1 if acted_on else 0},
|
|
567
|
+
"id = ?",
|
|
568
|
+
(prediction_id,),
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
def _get_pattern_feedback(self, prediction_type: str, pattern_name: str) -> float:
|
|
572
|
+
"""
|
|
573
|
+
Return a priority multiplier based on past user engagement with similar predictions.
|
|
574
|
+
|
|
575
|
+
Rules:
|
|
576
|
+
- <5 shown predictions for this type: return 1.0 (insufficient data)
|
|
577
|
+
- act_ratio < 0.1: return 0.5 (user ignores these, halve priority)
|
|
578
|
+
- act_ratio > 0.5: return 1.25 (user values these, boost priority)
|
|
579
|
+
- Otherwise: return 1.0 (neutral)
|
|
580
|
+
"""
|
|
581
|
+
try:
|
|
582
|
+
rows = self.db.execute(
|
|
583
|
+
"""
|
|
584
|
+
SELECT COUNT(*) as total,
|
|
585
|
+
SUM(CASE WHEN is_acted_on = 1 THEN 1 ELSE 0 END) as acted
|
|
586
|
+
FROM predictions
|
|
587
|
+
WHERE prediction_type = ? AND is_shown = 1
|
|
588
|
+
""",
|
|
589
|
+
(prediction_type,),
|
|
590
|
+
fetch=True,
|
|
591
|
+
)
|
|
592
|
+
if not rows:
|
|
593
|
+
return 1.0
|
|
594
|
+
|
|
595
|
+
total = rows[0]["total"] or 0
|
|
596
|
+
acted = rows[0]["acted"] or 0
|
|
597
|
+
|
|
598
|
+
if total < 5:
|
|
599
|
+
return 1.0
|
|
600
|
+
|
|
601
|
+
act_ratio = acted / total
|
|
602
|
+
if act_ratio < 0.1:
|
|
603
|
+
return 0.5
|
|
604
|
+
elif act_ratio > 0.5:
|
|
605
|
+
return 1.25
|
|
606
|
+
return 1.0
|
|
607
|
+
except Exception as e:
|
|
608
|
+
logger.debug(f"Feedback lookup failed: {e}")
|
|
609
|
+
return 1.0
|
|
610
|
+
|
|
611
|
+
def merge_similar_memories(self) -> int:
|
|
612
|
+
"""
|
|
613
|
+
Merge semantically similar memories during consolidation.
|
|
614
|
+
Uses existing stored embeddings -- no new Ollama calls.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
Count of merged memory pairs
|
|
618
|
+
"""
|
|
619
|
+
if not self.config.enable_memory_merging:
|
|
620
|
+
return 0
|
|
621
|
+
|
|
622
|
+
threshold = self.config.similarity_merge_threshold
|
|
623
|
+
merged_count = 0
|
|
624
|
+
|
|
625
|
+
try:
|
|
626
|
+
# Find entities with 5+ linked memories (high-memory entities first)
|
|
627
|
+
entity_rows = self.db.execute(
|
|
628
|
+
"""
|
|
629
|
+
SELECT me.entity_id, COUNT(DISTINCT me.memory_id) as mem_count
|
|
630
|
+
FROM memory_entities me
|
|
631
|
+
GROUP BY me.entity_id
|
|
632
|
+
HAVING mem_count >= 5
|
|
633
|
+
ORDER BY mem_count DESC
|
|
634
|
+
LIMIT 50
|
|
635
|
+
""",
|
|
636
|
+
fetch=True,
|
|
637
|
+
) or []
|
|
638
|
+
|
|
639
|
+
for entity_row in entity_rows:
|
|
640
|
+
entity_id = entity_row["entity_id"]
|
|
641
|
+
|
|
642
|
+
# Load memory IDs and embeddings for this entity
|
|
643
|
+
mem_rows = self.db.execute(
|
|
644
|
+
"""
|
|
645
|
+
SELECT me.memory_id, m.importance, m.access_count,
|
|
646
|
+
emb.embedding
|
|
647
|
+
FROM memory_entities me
|
|
648
|
+
JOIN memories m ON me.memory_id = m.id
|
|
649
|
+
LEFT JOIN memory_embeddings emb ON m.id = emb.memory_id
|
|
650
|
+
WHERE me.entity_id = ?
|
|
651
|
+
AND m.importance > 0.01
|
|
652
|
+
ORDER BY m.importance DESC
|
|
653
|
+
""",
|
|
654
|
+
(entity_id,),
|
|
655
|
+
fetch=True,
|
|
656
|
+
) or []
|
|
657
|
+
|
|
658
|
+
# Parse embeddings
|
|
659
|
+
memories_with_emb = []
|
|
660
|
+
for row in mem_rows:
|
|
661
|
+
if row["embedding"]:
|
|
662
|
+
try:
|
|
663
|
+
emb = json.loads(row["embedding"]) if isinstance(row["embedding"], str) else row["embedding"]
|
|
664
|
+
memories_with_emb.append({
|
|
665
|
+
"id": row["memory_id"],
|
|
666
|
+
"importance": row["importance"],
|
|
667
|
+
"access_count": row["access_count"] or 0,
|
|
668
|
+
"embedding": emb,
|
|
669
|
+
})
|
|
670
|
+
except (json.JSONDecodeError, TypeError):
|
|
671
|
+
continue
|
|
672
|
+
|
|
673
|
+
if len(memories_with_emb) < 2:
|
|
674
|
+
continue
|
|
675
|
+
|
|
676
|
+
# Pairwise cosine similarity
|
|
677
|
+
already_merged = set()
|
|
678
|
+
for i in range(len(memories_with_emb)):
|
|
679
|
+
if memories_with_emb[i]["id"] in already_merged:
|
|
680
|
+
continue
|
|
681
|
+
for j in range(i + 1, len(memories_with_emb)):
|
|
682
|
+
if memories_with_emb[j]["id"] in already_merged:
|
|
683
|
+
continue
|
|
684
|
+
|
|
685
|
+
sim = _cosine_similarity(
|
|
686
|
+
memories_with_emb[i]["embedding"],
|
|
687
|
+
memories_with_emb[j]["embedding"],
|
|
688
|
+
)
|
|
689
|
+
if sim >= threshold:
|
|
690
|
+
# Keep the one with higher importance * (1 + access_count)
|
|
691
|
+
score_i = memories_with_emb[i]["importance"] * (1 + memories_with_emb[i]["access_count"])
|
|
692
|
+
score_j = memories_with_emb[j]["importance"] * (1 + memories_with_emb[j]["access_count"])
|
|
693
|
+
|
|
694
|
+
if score_i >= score_j:
|
|
695
|
+
primary_id = memories_with_emb[i]["id"]
|
|
696
|
+
duplicate_id = memories_with_emb[j]["id"]
|
|
697
|
+
else:
|
|
698
|
+
primary_id = memories_with_emb[j]["id"]
|
|
699
|
+
duplicate_id = memories_with_emb[i]["id"]
|
|
700
|
+
|
|
701
|
+
self._merge_memory_pair(primary_id, duplicate_id)
|
|
702
|
+
already_merged.add(duplicate_id)
|
|
703
|
+
merged_count += 1
|
|
704
|
+
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.warning(f"Memory merging failed: {e}")
|
|
707
|
+
|
|
708
|
+
if merged_count > 0:
|
|
709
|
+
logger.info(f"Merged {merged_count} near-duplicate memory pairs")
|
|
710
|
+
return merged_count
|
|
711
|
+
|
|
712
|
+
def _merge_memory_pair(self, primary_id: int, duplicate_id: int) -> None:
|
|
713
|
+
"""
|
|
714
|
+
Merge a duplicate memory into the primary.
|
|
715
|
+
|
|
716
|
+
- Transfers entity links from duplicate to primary
|
|
717
|
+
- Adds merged_from to primary's metadata
|
|
718
|
+
- Sets duplicate importance to 0.001
|
|
719
|
+
"""
|
|
720
|
+
# Transfer entity links
|
|
721
|
+
dup_links = self.db.execute(
|
|
722
|
+
"SELECT entity_id, relationship FROM memory_entities WHERE memory_id = ?",
|
|
723
|
+
(duplicate_id,),
|
|
724
|
+
fetch=True,
|
|
725
|
+
) or []
|
|
726
|
+
|
|
727
|
+
for link in dup_links:
|
|
728
|
+
try:
|
|
729
|
+
self.db.insert(
|
|
730
|
+
"memory_entities",
|
|
731
|
+
{
|
|
732
|
+
"memory_id": primary_id,
|
|
733
|
+
"entity_id": link["entity_id"],
|
|
734
|
+
"relationship": link["relationship"],
|
|
735
|
+
},
|
|
736
|
+
)
|
|
737
|
+
except Exception:
|
|
738
|
+
pass # Duplicate link, ignore
|
|
739
|
+
|
|
740
|
+
# Update primary's metadata with merge info
|
|
741
|
+
primary = self.db.get_one("memories", where="id = ?", where_params=(primary_id,))
|
|
742
|
+
if primary:
|
|
743
|
+
meta = json.loads(primary["metadata"] or "{}")
|
|
744
|
+
merged_from = meta.get("merged_from", [])
|
|
745
|
+
merged_from.append(duplicate_id)
|
|
746
|
+
meta["merged_from"] = merged_from
|
|
747
|
+
self.db.update(
|
|
748
|
+
"memories",
|
|
749
|
+
{"metadata": json.dumps(meta), "updated_at": datetime.utcnow().isoformat()},
|
|
750
|
+
"id = ?",
|
|
751
|
+
(primary_id,),
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
# Suppress duplicate (don't delete, just minimize importance)
|
|
755
|
+
self.db.update(
|
|
756
|
+
"memories",
|
|
757
|
+
{"importance": 0.001, "updated_at": datetime.utcnow().isoformat()},
|
|
758
|
+
"id = ?",
|
|
759
|
+
(duplicate_id,),
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
logger.debug(f"Merged memory {duplicate_id} into {primary_id}")
|
|
763
|
+
|
|
542
764
|
def run_full_consolidation(self) -> Dict[str, Any]:
|
|
543
765
|
"""
|
|
544
766
|
Run complete consolidation: decay, patterns, predictions.
|
|
@@ -554,6 +776,9 @@ class ConsolidateService:
|
|
|
554
776
|
# Boost accessed memories
|
|
555
777
|
results["boosted"] = self.boost_accessed_memories()
|
|
556
778
|
|
|
779
|
+
# Merge near-duplicate memories
|
|
780
|
+
results["merged"] = self.merge_similar_memories()
|
|
781
|
+
|
|
557
782
|
# Detect patterns
|
|
558
783
|
patterns = self.detect_patterns()
|
|
559
784
|
results["patterns_detected"] = len(patterns)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministic Guards for Claudia Memory System
|
|
3
|
+
|
|
4
|
+
Pure-Python validation on memory writes. Zero LLM cost, always on.
|
|
5
|
+
Guards are advisory -- they warn and auto-correct, never block writes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from difflib import SequenceMatcher
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Regex patterns for commitment deadline detection
|
|
17
|
+
DEADLINE_PATTERNS = [
|
|
18
|
+
re.compile(r"\b(by|before|due|until|deadline)\s+\w+", re.IGNORECASE),
|
|
19
|
+
re.compile(r"\b\d{1,2}[/-]\d{1,2}([/-]\d{2,4})?\b"), # Date formats: 1/15, 01-15-2025
|
|
20
|
+
re.compile(r"\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}\b", re.IGNORECASE),
|
|
21
|
+
re.compile(r"\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", re.IGNORECASE),
|
|
22
|
+
re.compile(r"\b(tomorrow|tonight|next week|next month|end of (week|month|day|year))\b", re.IGNORECASE),
|
|
23
|
+
re.compile(r"\bEOD\b|\bEOW\b|\bEOM\b"),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ValidationResult:
|
|
29
|
+
"""Result of a validation check"""
|
|
30
|
+
is_valid: bool = True
|
|
31
|
+
warnings: List[str] = field(default_factory=list)
|
|
32
|
+
adjustments: Dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_memory(
|
|
36
|
+
content: str,
|
|
37
|
+
memory_type: str = "fact",
|
|
38
|
+
importance: float = 1.0,
|
|
39
|
+
metadata: Optional[Dict] = None,
|
|
40
|
+
) -> ValidationResult:
|
|
41
|
+
"""
|
|
42
|
+
Validate a memory before storage.
|
|
43
|
+
|
|
44
|
+
Checks:
|
|
45
|
+
- Content length (warn >500, truncate >1000)
|
|
46
|
+
- Commitment deadline detection via regex
|
|
47
|
+
- Importance clamped to [0, 1]
|
|
48
|
+
"""
|
|
49
|
+
result = ValidationResult()
|
|
50
|
+
|
|
51
|
+
# Content length checks
|
|
52
|
+
if len(content) > 1000:
|
|
53
|
+
result.warnings.append(f"Content truncated from {len(content)} to 1000 characters")
|
|
54
|
+
result.adjustments["content"] = content[:1000]
|
|
55
|
+
elif len(content) > 500:
|
|
56
|
+
result.warnings.append(f"Long content ({len(content)} chars) -- consider breaking into multiple memories")
|
|
57
|
+
|
|
58
|
+
# Importance clamping
|
|
59
|
+
if importance < 0:
|
|
60
|
+
result.warnings.append(f"Importance {importance} clamped to 0.0")
|
|
61
|
+
result.adjustments["importance"] = 0.0
|
|
62
|
+
elif importance > 1:
|
|
63
|
+
result.warnings.append(f"Importance {importance} clamped to 1.0")
|
|
64
|
+
result.adjustments["importance"] = 1.0
|
|
65
|
+
|
|
66
|
+
# Commitment deadline detection
|
|
67
|
+
if memory_type == "commitment":
|
|
68
|
+
has_deadline = any(p.search(content) for p in DEADLINE_PATTERNS)
|
|
69
|
+
if not has_deadline:
|
|
70
|
+
result.warnings.append("Commitment has no detected deadline -- consider adding a target date")
|
|
71
|
+
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def validate_entity(
|
|
76
|
+
name: str,
|
|
77
|
+
entity_type: str = "",
|
|
78
|
+
existing_canonical_names: Optional[List[str]] = None,
|
|
79
|
+
) -> ValidationResult:
|
|
80
|
+
"""
|
|
81
|
+
Validate an entity before storage.
|
|
82
|
+
|
|
83
|
+
Checks:
|
|
84
|
+
- Type is required (non-empty)
|
|
85
|
+
- Name is not empty
|
|
86
|
+
- Near-duplicate name detection via SequenceMatcher (ratio > 0.85)
|
|
87
|
+
"""
|
|
88
|
+
result = ValidationResult()
|
|
89
|
+
|
|
90
|
+
# Name check
|
|
91
|
+
if not name or not name.strip():
|
|
92
|
+
result.is_valid = False
|
|
93
|
+
result.warnings.append("Entity name cannot be empty")
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
# Type check
|
|
97
|
+
if not entity_type or not entity_type.strip():
|
|
98
|
+
result.warnings.append("Entity type is required but was empty -- defaulting to 'person'")
|
|
99
|
+
result.adjustments["entity_type"] = "person"
|
|
100
|
+
|
|
101
|
+
# Near-duplicate detection
|
|
102
|
+
if existing_canonical_names:
|
|
103
|
+
canonical = name.strip().lower()
|
|
104
|
+
for existing in existing_canonical_names:
|
|
105
|
+
ratio = SequenceMatcher(None, canonical, existing.lower()).ratio()
|
|
106
|
+
if ratio > 0.85 and canonical != existing.lower():
|
|
107
|
+
result.warnings.append(
|
|
108
|
+
f"Near-duplicate entity name: '{name}' is similar to existing '{existing}' "
|
|
109
|
+
f"(similarity: {ratio:.2f})"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def validate_relationship(strength: float = 1.0) -> ValidationResult:
|
|
116
|
+
"""
|
|
117
|
+
Validate a relationship before storage.
|
|
118
|
+
|
|
119
|
+
Checks:
|
|
120
|
+
- Strength clamped to [0, 1]
|
|
121
|
+
"""
|
|
122
|
+
result = ValidationResult()
|
|
123
|
+
|
|
124
|
+
if strength < 0:
|
|
125
|
+
result.warnings.append(f"Relationship strength {strength} clamped to 0.0")
|
|
126
|
+
result.adjustments["strength"] = 0.0
|
|
127
|
+
elif strength > 1:
|
|
128
|
+
result.warnings.append(f"Relationship strength {strength} clamped to 1.0")
|
|
129
|
+
result.adjustments["strength"] = 1.0
|
|
130
|
+
|
|
131
|
+
return result
|