get-claudia 1.9.0 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,14 @@ class MemoryConfig:
40
40
  recency_weight: float = 0.10 # Weight for recency
41
41
  fts_weight: float = 0.15 # Weight for FTS5 full-text search match
42
42
 
43
+ # Memory merging
44
+ similarity_merge_threshold: float = 0.92 # Cosine similarity threshold for merging
45
+ enable_memory_merging: bool = True # Toggle memory merging during consolidation
46
+
47
+ # Verification
48
+ verify_interval_minutes: int = 60 # How often to run background verification
49
+ verify_batch_size: int = 20 # Max memories to verify per run
50
+
43
51
  # Health check
44
52
  health_port: int = 3848
45
53
 
@@ -19,6 +19,7 @@ from ..services.consolidate import (
19
19
  run_decay,
20
20
  run_full_consolidation,
21
21
  )
22
+ from ..services.verify import run_verification
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -73,6 +74,15 @@ class MemoryScheduler:
73
74
  replace_existing=True,
74
75
  )
75
76
 
77
+ # Periodic: Memory verification
78
+ self.scheduler.add_job(
79
+ self._run_memory_verification,
80
+ IntervalTrigger(minutes=self.config.verify_interval_minutes),
81
+ id="memory_verification",
82
+ name="Background memory verification",
83
+ replace_existing=True,
84
+ )
85
+
76
86
  self.scheduler.start()
77
87
  self._started = True
78
88
  logger.info("Memory scheduler started")
@@ -136,6 +146,15 @@ class MemoryScheduler:
136
146
  except Exception as e:
137
147
  logger.exception("Error in prediction generation")
138
148
 
149
+ def _run_memory_verification(self) -> None:
150
+ """Run background memory verification"""
151
+ try:
152
+ logger.debug("Running memory verification")
153
+ result = run_verification()
154
+ logger.debug(f"Memory verification complete: {result}")
155
+ except Exception as e:
156
+ logger.exception("Error in memory verification")
157
+
139
158
 
140
159
  # Global scheduler instance
141
160
  _scheduler: Optional[MemoryScheduler] = None
@@ -303,6 +303,43 @@ class Database:
303
303
  logger.warning(f"Migration 4 (FTS5) failed: {e}. FTS5 may not be available.")
304
304
  # FTS5 is optional; the system degrades gracefully without it
305
305
 
306
+ if current_version < 5:
307
+ # Migration 5: Add verification columns to memories, pattern_name to predictions
308
+ migration_stmts = [
309
+ "ALTER TABLE memories ADD COLUMN verified_at TEXT",
310
+ "ALTER TABLE memories ADD COLUMN verification_status TEXT DEFAULT 'pending'",
311
+ "ALTER TABLE predictions ADD COLUMN prediction_pattern_name TEXT",
312
+ ]
313
+ for stmt in migration_stmts:
314
+ try:
315
+ conn.execute(stmt)
316
+ except sqlite3.OperationalError as e:
317
+ if "duplicate column" not in str(e).lower():
318
+ logger.warning(f"Migration 5 statement failed: {e}")
319
+
320
+ # Index for verification queries
321
+ try:
322
+ conn.execute(
323
+ "CREATE INDEX IF NOT EXISTS idx_memories_verification ON memories(verification_status)"
324
+ )
325
+ except sqlite3.OperationalError as e:
326
+ logger.warning(f"Migration 5 index failed: {e}")
327
+
328
+ # Grandfather existing memories as verified
329
+ try:
330
+ conn.execute(
331
+ """UPDATE memories SET verification_status = 'verified', verified_at = datetime('now')
332
+ WHERE verification_status = 'pending' OR verification_status IS NULL"""
333
+ )
334
+ except sqlite3.OperationalError as e:
335
+ logger.warning(f"Migration 5 grandfather failed: {e}")
336
+
337
+ conn.execute(
338
+ "INSERT OR IGNORE INTO schema_migrations (version, description) VALUES (5, 'Add verification columns to memories, prediction_pattern_name to predictions')"
339
+ )
340
+ conn.commit()
341
+ logger.info("Applied migration 5: memory verification and prediction feedback")
342
+
306
343
  def execute(
307
344
  self, sql: str, params: Tuple = (), fetch: bool = False
308
345
  ) -> Optional[List[sqlite3.Row]]:
@@ -570,6 +570,24 @@ async def list_tools() -> ListToolsResult:
570
570
  "properties": {},
571
571
  },
572
572
  ),
573
+ Tool(
574
+ name="memory.prediction_feedback",
575
+ description="Provide feedback on a prediction -- mark whether the user acted on it. This trains future prediction priority.",
576
+ inputSchema={
577
+ "type": "object",
578
+ "properties": {
579
+ "prediction_id": {
580
+ "type": "integer",
581
+ "description": "The prediction ID to provide feedback for",
582
+ },
583
+ "acted_on": {
584
+ "type": "boolean",
585
+ "description": "Whether the user acted on this prediction",
586
+ },
587
+ },
588
+ "required": ["prediction_id", "acted_on"],
589
+ },
590
+ ),
573
591
  Tool(
574
592
  name="cognitive.ingest",
575
593
  description=(
@@ -1021,6 +1039,21 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1021
1039
  ]
1022
1040
  )
1023
1041
 
1042
+ elif name == "memory.prediction_feedback":
1043
+ svc = get_consolidate_service()
1044
+ svc.mark_prediction_acted_on(
1045
+ prediction_id=arguments["prediction_id"],
1046
+ acted_on=arguments["acted_on"],
1047
+ )
1048
+ return CallToolResult(
1049
+ content=[
1050
+ TextContent(
1051
+ type="text",
1052
+ text=json.dumps({"success": True, "prediction_id": arguments["prediction_id"]}),
1053
+ )
1054
+ ]
1055
+ )
1056
+
1024
1057
  elif name == "memory.trace":
1025
1058
  result = trace_memory(memory_id=arguments["memory_id"])
1026
1059
  return CallToolResult(
@@ -58,6 +58,8 @@ CREATE TABLE IF NOT EXISTS memories (
58
58
  updated_at TEXT DEFAULT (datetime('now')),
59
59
  last_accessed_at TEXT, -- For rehearsal-based importance boost
60
60
  access_count INTEGER DEFAULT 0,
61
+ verified_at TEXT, -- When this memory was verified
62
+ verification_status TEXT DEFAULT 'pending', -- pending, verified, flagged, contradicts
61
63
  metadata TEXT -- JSON blob for flexible attributes
62
64
  );
63
65
 
@@ -65,6 +67,7 @@ CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
65
67
  CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance DESC);
66
68
  CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
67
69
  CREATE INDEX IF NOT EXISTS idx_memories_hash ON memories(content_hash);
70
+ CREATE INDEX IF NOT EXISTS idx_memories_verification ON memories(verification_status);
68
71
 
69
72
  -- Junction table linking memories to entities
70
73
  CREATE TABLE IF NOT EXISTS memory_entities (
@@ -171,6 +174,7 @@ CREATE TABLE IF NOT EXISTS predictions (
171
174
  is_acted_on INTEGER DEFAULT 0, -- Whether user acted on this
172
175
  created_at TEXT DEFAULT (datetime('now')),
173
176
  shown_at TEXT,
177
+ prediction_pattern_name TEXT, -- Links to pattern for feedback loop
174
178
  metadata TEXT
175
179
  );
176
180
 
@@ -256,3 +260,6 @@ VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episo
256
260
  -- NOTE: FTS5 full-text search (migration v4) is created by database.py migration code
257
261
  -- rather than here, because CREATE TRIGGER statements contain internal semicolons
258
262
  -- that the schema.sql line-based parser cannot handle.
263
+
264
+ INSERT OR IGNORE INTO schema_migrations (version, description)
265
+ VALUES (5, 'Add verification columns to memories, prediction_pattern_name to predictions');
@@ -18,6 +18,18 @@ from ..database import get_db
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
20
 
21
+ def _cosine_similarity(a: list, b: list) -> float:
22
+ """Pure Python cosine similarity between two vectors."""
23
+ if len(a) != len(b) or len(a) == 0:
24
+ return 0.0
25
+ dot = sum(x * y for x, y in zip(a, b))
26
+ norm_a = sum(x * x for x in a) ** 0.5
27
+ norm_b = sum(x * x for x in b) ** 0.5
28
+ if norm_a == 0 or norm_b == 0:
29
+ return 0.0
30
+ return dot / (norm_a * norm_b)
31
+
32
+
21
33
  @dataclass
22
34
  class DetectedPattern:
23
35
  """A pattern detected in the user's behavior or data"""
@@ -38,6 +50,7 @@ class Prediction:
38
50
  priority: float
39
51
  expires_at: Optional[datetime]
40
52
  metadata: Optional[Dict] = None
53
+ pattern_name: Optional[str] = None
41
54
 
42
55
 
43
56
  class ConsolidateService:
@@ -409,13 +422,15 @@ class ConsolidateService:
409
422
  )
410
423
 
411
424
  for pattern in cooling_patterns:
425
+ feedback = self._get_pattern_feedback("suggestion", pattern["name"])
412
426
  predictions.append(
413
427
  Prediction(
414
428
  content=pattern["description"],
415
429
  prediction_type="suggestion",
416
- priority=pattern["confidence"],
430
+ priority=pattern["confidence"] * feedback,
417
431
  expires_at=datetime.utcnow() + timedelta(days=7),
418
432
  metadata={"pattern_id": pattern["id"]},
433
+ pattern_name=pattern["name"],
419
434
  )
420
435
  )
421
436
 
@@ -438,13 +453,16 @@ class ConsolidateService:
438
453
  days_old = (datetime.utcnow() - created).days
439
454
 
440
455
  if days_old > 3:
456
+ pattern_name = f"commitment_reminder_{commitment['id']}"
457
+ feedback = self._get_pattern_feedback("reminder", pattern_name)
441
458
  predictions.append(
442
459
  Prediction(
443
460
  content=f"Commitment from {days_old} days ago: {commitment['content'][:100]}",
444
461
  prediction_type="reminder",
445
- priority=min(1.0, 0.5 + days_old / 14),
462
+ priority=min(1.0, 0.5 + days_old / 14) * feedback,
446
463
  expires_at=datetime.utcnow() + timedelta(days=2),
447
464
  metadata={"memory_id": commitment["id"]},
465
+ pattern_name=pattern_name,
448
466
  )
449
467
  )
450
468
 
@@ -464,13 +482,15 @@ class ConsolidateService:
464
482
 
465
483
  for pattern in patterns:
466
484
  if pattern["pattern_type"] == "behavioral":
485
+ feedback = self._get_pattern_feedback("insight", pattern["name"])
467
486
  predictions.append(
468
487
  Prediction(
469
488
  content=f"Pattern noticed: {pattern['description']}",
470
489
  prediction_type="insight",
471
- priority=pattern["confidence"] * 0.8,
490
+ priority=pattern["confidence"] * 0.8 * feedback,
472
491
  expires_at=datetime.utcnow() + timedelta(days=14),
473
492
  metadata={"pattern_id": pattern["id"]},
493
+ pattern_name=pattern["name"],
474
494
  )
475
495
  )
476
496
 
@@ -478,19 +498,19 @@ class ConsolidateService:
478
498
 
479
499
  def _store_prediction(self, prediction: Prediction) -> int:
480
500
  """Store a prediction in the database"""
481
- return self.db.insert(
482
- "predictions",
483
- {
484
- "content": prediction.content,
485
- "prediction_type": prediction.prediction_type,
486
- "priority": prediction.priority,
487
- "expires_at": prediction.expires_at.isoformat() if prediction.expires_at else None,
488
- "is_shown": 0,
489
- "is_acted_on": 0,
490
- "created_at": datetime.utcnow().isoformat(),
491
- "metadata": json.dumps(prediction.metadata) if prediction.metadata else None,
492
- },
493
- )
501
+ data = {
502
+ "content": prediction.content,
503
+ "prediction_type": prediction.prediction_type,
504
+ "priority": prediction.priority,
505
+ "expires_at": prediction.expires_at.isoformat() if prediction.expires_at else None,
506
+ "is_shown": 0,
507
+ "is_acted_on": 0,
508
+ "created_at": datetime.utcnow().isoformat(),
509
+ "metadata": json.dumps(prediction.metadata) if prediction.metadata else None,
510
+ }
511
+ if prediction.pattern_name:
512
+ data["prediction_pattern_name"] = prediction.pattern_name
513
+ return self.db.insert("predictions", data)
494
514
 
495
515
  def get_predictions(
496
516
  self,
@@ -539,6 +559,208 @@ class ConsolidateService:
539
559
  (prediction_id,),
540
560
  )
541
561
 
562
+ def mark_prediction_acted_on(self, prediction_id: int, acted_on: bool) -> None:
563
+ """Mark a prediction as acted on (or not) by the user"""
564
+ self.db.update(
565
+ "predictions",
566
+ {"is_acted_on": 1 if acted_on else 0},
567
+ "id = ?",
568
+ (prediction_id,),
569
+ )
570
+
571
+ def _get_pattern_feedback(self, prediction_type: str, pattern_name: str) -> float:
572
+ """
573
+ Return a priority multiplier based on past user engagement with similar predictions.
574
+
575
+ Rules:
576
+ - <5 shown predictions for this type: return 1.0 (insufficient data)
577
+ - act_ratio < 0.1: return 0.5 (user ignores these, halve priority)
578
+ - act_ratio > 0.5: return 1.25 (user values these, boost priority)
579
+ - Otherwise: return 1.0 (neutral)
580
+ """
581
+ try:
582
+ rows = self.db.execute(
583
+ """
584
+ SELECT COUNT(*) as total,
585
+ SUM(CASE WHEN is_acted_on = 1 THEN 1 ELSE 0 END) as acted
586
+ FROM predictions
587
+ WHERE prediction_type = ? AND is_shown = 1
588
+ """,
589
+ (prediction_type,),
590
+ fetch=True,
591
+ )
592
+ if not rows:
593
+ return 1.0
594
+
595
+ total = rows[0]["total"] or 0
596
+ acted = rows[0]["acted"] or 0
597
+
598
+ if total < 5:
599
+ return 1.0
600
+
601
+ act_ratio = acted / total
602
+ if act_ratio < 0.1:
603
+ return 0.5
604
+ elif act_ratio > 0.5:
605
+ return 1.25
606
+ return 1.0
607
+ except Exception as e:
608
+ logger.debug(f"Feedback lookup failed: {e}")
609
+ return 1.0
610
+
611
+ def merge_similar_memories(self) -> int:
612
+ """
613
+ Merge semantically similar memories during consolidation.
614
+ Uses existing stored embeddings -- no new Ollama calls.
615
+
616
+ Returns:
617
+ Count of merged memory pairs
618
+ """
619
+ if not self.config.enable_memory_merging:
620
+ return 0
621
+
622
+ threshold = self.config.similarity_merge_threshold
623
+ merged_count = 0
624
+
625
+ try:
626
+ # Find entities with 5+ linked memories (high-memory entities first)
627
+ entity_rows = self.db.execute(
628
+ """
629
+ SELECT me.entity_id, COUNT(DISTINCT me.memory_id) as mem_count
630
+ FROM memory_entities me
631
+ GROUP BY me.entity_id
632
+ HAVING mem_count >= 5
633
+ ORDER BY mem_count DESC
634
+ LIMIT 50
635
+ """,
636
+ fetch=True,
637
+ ) or []
638
+
639
+ for entity_row in entity_rows:
640
+ entity_id = entity_row["entity_id"]
641
+
642
+ # Load memory IDs and embeddings for this entity
643
+ mem_rows = self.db.execute(
644
+ """
645
+ SELECT me.memory_id, m.importance, m.access_count,
646
+ emb.embedding
647
+ FROM memory_entities me
648
+ JOIN memories m ON me.memory_id = m.id
649
+ LEFT JOIN memory_embeddings emb ON m.id = emb.memory_id
650
+ WHERE me.entity_id = ?
651
+ AND m.importance > 0.01
652
+ ORDER BY m.importance DESC
653
+ """,
654
+ (entity_id,),
655
+ fetch=True,
656
+ ) or []
657
+
658
+ # Parse embeddings
659
+ memories_with_emb = []
660
+ for row in mem_rows:
661
+ if row["embedding"]:
662
+ try:
663
+ emb = json.loads(row["embedding"]) if isinstance(row["embedding"], str) else row["embedding"]
664
+ memories_with_emb.append({
665
+ "id": row["memory_id"],
666
+ "importance": row["importance"],
667
+ "access_count": row["access_count"] or 0,
668
+ "embedding": emb,
669
+ })
670
+ except (json.JSONDecodeError, TypeError):
671
+ continue
672
+
673
+ if len(memories_with_emb) < 2:
674
+ continue
675
+
676
+ # Pairwise cosine similarity
677
+ already_merged = set()
678
+ for i in range(len(memories_with_emb)):
679
+ if memories_with_emb[i]["id"] in already_merged:
680
+ continue
681
+ for j in range(i + 1, len(memories_with_emb)):
682
+ if memories_with_emb[j]["id"] in already_merged:
683
+ continue
684
+
685
+ sim = _cosine_similarity(
686
+ memories_with_emb[i]["embedding"],
687
+ memories_with_emb[j]["embedding"],
688
+ )
689
+ if sim >= threshold:
690
+ # Keep the one with higher importance * (1 + access_count)
691
+ score_i = memories_with_emb[i]["importance"] * (1 + memories_with_emb[i]["access_count"])
692
+ score_j = memories_with_emb[j]["importance"] * (1 + memories_with_emb[j]["access_count"])
693
+
694
+ if score_i >= score_j:
695
+ primary_id = memories_with_emb[i]["id"]
696
+ duplicate_id = memories_with_emb[j]["id"]
697
+ else:
698
+ primary_id = memories_with_emb[j]["id"]
699
+ duplicate_id = memories_with_emb[i]["id"]
700
+
701
+ self._merge_memory_pair(primary_id, duplicate_id)
702
+ already_merged.add(duplicate_id)
703
+ merged_count += 1
704
+
705
+ except Exception as e:
706
+ logger.warning(f"Memory merging failed: {e}")
707
+
708
+ if merged_count > 0:
709
+ logger.info(f"Merged {merged_count} near-duplicate memory pairs")
710
+ return merged_count
711
+
712
+ def _merge_memory_pair(self, primary_id: int, duplicate_id: int) -> None:
713
+ """
714
+ Merge a duplicate memory into the primary.
715
+
716
+ - Transfers entity links from duplicate to primary
717
+ - Adds merged_from to primary's metadata
718
+ - Sets duplicate importance to 0.001
719
+ """
720
+ # Transfer entity links
721
+ dup_links = self.db.execute(
722
+ "SELECT entity_id, relationship FROM memory_entities WHERE memory_id = ?",
723
+ (duplicate_id,),
724
+ fetch=True,
725
+ ) or []
726
+
727
+ for link in dup_links:
728
+ try:
729
+ self.db.insert(
730
+ "memory_entities",
731
+ {
732
+ "memory_id": primary_id,
733
+ "entity_id": link["entity_id"],
734
+ "relationship": link["relationship"],
735
+ },
736
+ )
737
+ except Exception:
738
+ pass # Duplicate link, ignore
739
+
740
+ # Update primary's metadata with merge info
741
+ primary = self.db.get_one("memories", where="id = ?", where_params=(primary_id,))
742
+ if primary:
743
+ meta = json.loads(primary["metadata"] or "{}")
744
+ merged_from = meta.get("merged_from", [])
745
+ merged_from.append(duplicate_id)
746
+ meta["merged_from"] = merged_from
747
+ self.db.update(
748
+ "memories",
749
+ {"metadata": json.dumps(meta), "updated_at": datetime.utcnow().isoformat()},
750
+ "id = ?",
751
+ (primary_id,),
752
+ )
753
+
754
+ # Suppress duplicate (don't delete, just minimize importance)
755
+ self.db.update(
756
+ "memories",
757
+ {"importance": 0.001, "updated_at": datetime.utcnow().isoformat()},
758
+ "id = ?",
759
+ (duplicate_id,),
760
+ )
761
+
762
+ logger.debug(f"Merged memory {duplicate_id} into {primary_id}")
763
+
542
764
  def run_full_consolidation(self) -> Dict[str, Any]:
543
765
  """
544
766
  Run complete consolidation: decay, patterns, predictions.
@@ -554,6 +776,9 @@ class ConsolidateService:
554
776
  # Boost accessed memories
555
777
  results["boosted"] = self.boost_accessed_memories()
556
778
 
779
+ # Merge near-duplicate memories
780
+ results["merged"] = self.merge_similar_memories()
781
+
557
782
  # Detect patterns
558
783
  patterns = self.detect_patterns()
559
784
  results["patterns_detected"] = len(patterns)
@@ -0,0 +1,131 @@
1
+ """
2
+ Deterministic Guards for Claudia Memory System
3
+
4
+ Pure-Python validation on memory writes. Zero LLM cost, always on.
5
+ Guards are advisory -- they warn and auto-correct, never block writes.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from difflib import SequenceMatcher
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Regex patterns for commitment deadline detection
17
+ DEADLINE_PATTERNS = [
18
+ re.compile(r"\b(by|before|due|until|deadline)\s+\w+", re.IGNORECASE),
19
+ re.compile(r"\b\d{1,2}[/-]\d{1,2}([/-]\d{2,4})?\b"), # Date formats: 1/15, 01-15-2025
20
+ re.compile(r"\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}\b", re.IGNORECASE),
21
+ re.compile(r"\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", re.IGNORECASE),
22
+ re.compile(r"\b(tomorrow|tonight|next week|next month|end of (week|month|day|year))\b", re.IGNORECASE),
23
+ re.compile(r"\bEOD\b|\bEOW\b|\bEOM\b"),
24
+ ]
25
+
26
+
27
+ @dataclass
28
+ class ValidationResult:
29
+ """Result of a validation check"""
30
+ is_valid: bool = True
31
+ warnings: List[str] = field(default_factory=list)
32
+ adjustments: Dict[str, Any] = field(default_factory=dict)
33
+
34
+
35
+ def validate_memory(
36
+ content: str,
37
+ memory_type: str = "fact",
38
+ importance: float = 1.0,
39
+ metadata: Optional[Dict] = None,
40
+ ) -> ValidationResult:
41
+ """
42
+ Validate a memory before storage.
43
+
44
+ Checks:
45
+ - Content length (warn >500, truncate >1000)
46
+ - Commitment deadline detection via regex
47
+ - Importance clamped to [0, 1]
48
+ """
49
+ result = ValidationResult()
50
+
51
+ # Content length checks
52
+ if len(content) > 1000:
53
+ result.warnings.append(f"Content truncated from {len(content)} to 1000 characters")
54
+ result.adjustments["content"] = content[:1000]
55
+ elif len(content) > 500:
56
+ result.warnings.append(f"Long content ({len(content)} chars) -- consider breaking into multiple memories")
57
+
58
+ # Importance clamping
59
+ if importance < 0:
60
+ result.warnings.append(f"Importance {importance} clamped to 0.0")
61
+ result.adjustments["importance"] = 0.0
62
+ elif importance > 1:
63
+ result.warnings.append(f"Importance {importance} clamped to 1.0")
64
+ result.adjustments["importance"] = 1.0
65
+
66
+ # Commitment deadline detection
67
+ if memory_type == "commitment":
68
+ has_deadline = any(p.search(content) for p in DEADLINE_PATTERNS)
69
+ if not has_deadline:
70
+ result.warnings.append("Commitment has no detected deadline -- consider adding a target date")
71
+
72
+ return result
73
+
74
+
75
+ def validate_entity(
76
+ name: str,
77
+ entity_type: str = "",
78
+ existing_canonical_names: Optional[List[str]] = None,
79
+ ) -> ValidationResult:
80
+ """
81
+ Validate an entity before storage.
82
+
83
+ Checks:
84
+ - Type is required (non-empty)
85
+ - Name is not empty
86
+ - Near-duplicate name detection via SequenceMatcher (ratio > 0.85)
87
+ """
88
+ result = ValidationResult()
89
+
90
+ # Name check
91
+ if not name or not name.strip():
92
+ result.is_valid = False
93
+ result.warnings.append("Entity name cannot be empty")
94
+ return result
95
+
96
+ # Type check
97
+ if not entity_type or not entity_type.strip():
98
+ result.warnings.append("Entity type is required but was empty -- defaulting to 'person'")
99
+ result.adjustments["entity_type"] = "person"
100
+
101
+ # Near-duplicate detection
102
+ if existing_canonical_names:
103
+ canonical = name.strip().lower()
104
+ for existing in existing_canonical_names:
105
+ ratio = SequenceMatcher(None, canonical, existing.lower()).ratio()
106
+ if ratio > 0.85 and canonical != existing.lower():
107
+ result.warnings.append(
108
+ f"Near-duplicate entity name: '{name}' is similar to existing '{existing}' "
109
+ f"(similarity: {ratio:.2f})"
110
+ )
111
+
112
+ return result
113
+
114
+
115
+ def validate_relationship(strength: float = 1.0) -> ValidationResult:
116
+ """
117
+ Validate a relationship before storage.
118
+
119
+ Checks:
120
+ - Strength clamped to [0, 1]
121
+ """
122
+ result = ValidationResult()
123
+
124
+ if strength < 0:
125
+ result.warnings.append(f"Relationship strength {strength} clamped to 0.0")
126
+ result.adjustments["strength"] = 0.0
127
+ elif strength > 1:
128
+ result.warnings.append(f"Relationship strength {strength} clamped to 1.0")
129
+ result.adjustments["strength"] = 1.0
130
+
131
+ return result