superlocalmemory 2.3.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,11 +23,14 @@ Based on architecture: docs/architecture/05-pattern-learner.md
23
23
  import sqlite3
24
24
  import json
25
25
  import re
26
+ import logging
26
27
  from datetime import datetime, timedelta
27
28
  from pathlib import Path
28
29
  from typing import Dict, List, Optional, Any, Counter as CounterType
29
30
  from collections import Counter
30
31
 
32
+ logger = logging.getLogger(__name__)
33
+
31
34
  # Local NLP tools (no external APIs)
32
35
  try:
33
36
  from sklearn.feature_extraction.text import TfidfVectorizer
@@ -404,23 +407,54 @@ class ConfidenceScorer:
404
407
  evidence_memory_ids: List[int],
405
408
  total_memories: int
406
409
  ) -> float:
407
- """Calculate confidence score for a pattern."""
410
+ """
411
+ Calculate confidence using Beta-Binomial Bayesian posterior.
412
+
413
+ Based on MACLA (arXiv:2512.18950, Forouzandeh et al., Dec 2025):
414
+ posterior_mean = (alpha + evidence) / (alpha + beta + evidence + competition)
415
+
416
+ Adaptation: MACLA's Beta-Binomial uses pairwise interaction counts.
417
+ Our corpus has sparse signals (most memories are irrelevant to any
418
+ single pattern). We use log-scaled competition instead of raw total
419
+ to avoid over-dilution: competition = log2(total_memories).
420
+
421
+ Pattern-specific priors (alpha, beta):
422
+ - preference (1, 4): prior mean 0.20, ~8 items to reach 0.5
423
+ - style (1, 5): prior mean 0.17, subtler signals need more evidence
424
+ - terminology (2, 3): prior mean 0.40, direct usage signal
425
+ """
408
426
  if total_memories == 0 or not evidence_memory_ids:
409
427
  return 0.0
410
428
 
411
- # Base confidence: % of memories supporting this
412
- base_confidence = len(evidence_memory_ids) / total_memories
429
+ import math
430
+ evidence_count = len(evidence_memory_ids)
431
+
432
+ # Pattern-specific Beta priors (alpha, beta)
433
+ PRIORS = {
434
+ 'preference': (1.0, 4.0),
435
+ 'style': (1.0, 5.0),
436
+ 'terminology': (2.0, 3.0),
437
+ }
438
+ alpha, beta = PRIORS.get(pattern_type, (1.0, 4.0))
439
+
440
+ # Log-scaled competition: grows slowly with corpus size
441
+ # 10 memories -> 3.3, 60 -> 5.9, 500 -> 9.0, 5000 -> 12.3
442
+ competition = math.log2(max(2, total_memories))
443
+
444
+ # MACLA-inspired Beta posterior with log competition
445
+ posterior_mean = (alpha + evidence_count) / (alpha + beta + evidence_count + competition)
413
446
 
414
- # Consistency check: recency bonus
447
+ # Recency adjustment (mild: 1.0 to 1.15)
415
448
  recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
449
+ recency_factor = 1.0 + min(0.15, 0.075 * (recency_bonus - 1.0) / 0.2) if recency_bonus > 1.0 else 1.0
416
450
 
417
- # Distribution check: are memories spread over time or clustered?
451
+ # Temporal spread adjustment (0.9 to 1.1)
418
452
  distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
419
453
 
420
454
  # Final confidence
421
- confidence = base_confidence * recency_bonus * distribution_factor
455
+ confidence = posterior_mean * recency_factor * distribution_factor
422
456
 
423
- return min(1.0, confidence) # Cap at 1.0
457
+ return min(0.95, round(confidence, 3))
424
458
 
425
459
  def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
426
460
  """Give bonus to patterns with recent evidence."""
@@ -517,10 +551,21 @@ class PatternStore:
517
551
  self._init_tables()
518
552
 
519
553
  def _init_tables(self):
520
- """Initialize pattern tables if they don't exist."""
554
+ """Initialize pattern tables if they don't exist, or recreate if schema is incomplete."""
521
555
  conn = sqlite3.connect(self.db_path)
522
556
  cursor = conn.cursor()
523
557
 
558
+ # Check if existing tables have correct schema
559
+ for table_name, required_cols in [
560
+ ('identity_patterns', {'pattern_type', 'key', 'value', 'confidence'}),
561
+ ('pattern_examples', {'pattern_id', 'memory_id'}),
562
+ ]:
563
+ cursor.execute(f"PRAGMA table_info({table_name})")
564
+ existing_cols = {row[1] for row in cursor.fetchall()}
565
+ if existing_cols and not required_cols.issubset(existing_cols):
566
+ logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
567
+ cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
568
+
524
569
  # Identity patterns table
525
570
  cursor.execute('''
526
571
  CREATE TABLE IF NOT EXISTS identity_patterns (
@@ -532,12 +577,19 @@ class PatternStore:
532
577
  evidence_count INTEGER DEFAULT 1,
533
578
  memory_ids TEXT,
534
579
  category TEXT,
580
+ profile TEXT DEFAULT 'default',
535
581
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
536
582
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
537
- UNIQUE(pattern_type, key, category)
583
+ UNIQUE(pattern_type, key, category, profile)
538
584
  )
539
585
  ''')
540
586
 
587
+ # Add profile column if upgrading from older schema
588
+ try:
589
+ cursor.execute('ALTER TABLE identity_patterns ADD COLUMN profile TEXT DEFAULT "default"')
590
+ except sqlite3.OperationalError:
591
+ pass # Column already exists
592
+
541
593
  # Pattern examples table
542
594
  cursor.execute('''
543
595
  CREATE TABLE IF NOT EXISTS pattern_examples (
@@ -553,21 +605,23 @@ class PatternStore:
553
605
  # Indexes
554
606
  cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
555
607
  cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
608
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_profile ON identity_patterns(profile)')
556
609
 
557
610
  conn.commit()
558
611
  conn.close()
559
612
 
560
613
  def save_pattern(self, pattern: Dict[str, Any]) -> int:
561
- """Save or update a pattern."""
614
+ """Save or update a pattern (scoped by profile)."""
562
615
  conn = sqlite3.connect(self.db_path)
563
616
  cursor = conn.cursor()
617
+ profile = pattern.get('profile', 'default')
564
618
 
565
619
  try:
566
- # Check if pattern exists
620
+ # Check if pattern exists for this profile
567
621
  cursor.execute('''
568
622
  SELECT id FROM identity_patterns
569
- WHERE pattern_type = ? AND key = ? AND category = ?
570
- ''', (pattern['pattern_type'], pattern['key'], pattern['category']))
623
+ WHERE pattern_type = ? AND key = ? AND category = ? AND profile = ?
624
+ ''', (pattern['pattern_type'], pattern['key'], pattern['category'], profile))
571
625
 
572
626
  existing = cursor.fetchone()
573
627
 
@@ -592,8 +646,8 @@ class PatternStore:
592
646
  # Insert new pattern
593
647
  cursor.execute('''
594
648
  INSERT INTO identity_patterns
595
- (pattern_type, key, value, confidence, evidence_count, memory_ids, category)
596
- VALUES (?, ?, ?, ?, ?, ?, ?)
649
+ (pattern_type, key, value, confidence, evidence_count, memory_ids, category, profile)
650
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
597
651
  ''', (
598
652
  pattern['pattern_type'],
599
653
  pattern['key'],
@@ -601,7 +655,8 @@ class PatternStore:
601
655
  pattern['confidence'],
602
656
  pattern['evidence_count'],
603
657
  memory_ids_json,
604
- pattern['category']
658
+ pattern['category'],
659
+ profile
605
660
  ))
606
661
  pattern_id = cursor.lastrowid
607
662
 
@@ -648,25 +703,32 @@ class PatternStore:
648
703
  # Fallback: first 150 chars
649
704
  return content[:150] + ('...' if len(content) > 150 else '')
650
705
 
651
- def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None) -> List[Dict[str, Any]]:
652
- """Get patterns above confidence threshold."""
706
+ def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None,
707
+ profile: Optional[str] = None) -> List[Dict[str, Any]]:
708
+ """Get patterns above confidence threshold, optionally filtered by profile."""
653
709
  conn = sqlite3.connect(self.db_path)
654
710
  cursor = conn.cursor()
655
711
 
712
+ # Build query with optional filters
713
+ conditions = ['confidence >= ?']
714
+ params = [min_confidence]
715
+
656
716
  if pattern_type:
657
- cursor.execute('''
658
- SELECT id, pattern_type, key, value, confidence, evidence_count, updated_at, created_at
659
- FROM identity_patterns
660
- WHERE confidence >= ? AND pattern_type = ?
661
- ORDER BY confidence DESC, evidence_count DESC
662
- ''', (min_confidence, pattern_type))
663
- else:
664
- cursor.execute('''
665
- SELECT id, pattern_type, key, value, confidence, evidence_count, updated_at, created_at
666
- FROM identity_patterns
667
- WHERE confidence >= ?
668
- ORDER BY confidence DESC, evidence_count DESC
669
- ''', (min_confidence,))
717
+ conditions.append('pattern_type = ?')
718
+ params.append(pattern_type)
719
+
720
+ if profile:
721
+ conditions.append('profile = ?')
722
+ params.append(profile)
723
+
724
+ where_clause = ' AND '.join(conditions)
725
+ cursor.execute(f'''
726
+ SELECT id, pattern_type, key, value, confidence, evidence_count,
727
+ updated_at, created_at, category
728
+ FROM identity_patterns
729
+ WHERE {where_clause}
730
+ ORDER BY confidence DESC, evidence_count DESC
731
+ ''', params)
670
732
 
671
733
  patterns = []
672
734
  for row in cursor.fetchall():
@@ -676,9 +738,11 @@ class PatternStore:
676
738
  'key': row[2],
677
739
  'value': row[3],
678
740
  'confidence': row[4],
741
+ 'evidence_count': row[5],
679
742
  'frequency': row[5],
680
743
  'last_seen': row[6],
681
- 'created_at': row[7]
744
+ 'created_at': row[7],
745
+ 'category': row[8]
682
746
  })
683
747
 
684
748
  conn.close()
@@ -696,23 +760,37 @@ class PatternLearner:
696
760
  self.confidence_scorer = ConfidenceScorer(db_path)
697
761
  self.pattern_store = PatternStore(db_path)
698
762
 
763
+ def _get_active_profile(self) -> str:
764
+ """Get the currently active profile name from config."""
765
+ config_file = MEMORY_DIR / "profiles.json"
766
+ if config_file.exists():
767
+ try:
768
+ with open(config_file, 'r') as f:
769
+ config = json.load(f)
770
+ return config.get('active_profile', 'default')
771
+ except (json.JSONDecodeError, IOError):
772
+ pass
773
+ return 'default'
774
+
699
775
  def weekly_pattern_update(self) -> Dict[str, int]:
700
- """Full pattern analysis of all memories. Run this weekly."""
701
- print("Starting weekly pattern update...")
776
+ """Full pattern analysis of all memories for active profile. Run this weekly."""
777
+ active_profile = self._get_active_profile()
778
+ print(f"Starting weekly pattern update for profile: {active_profile}...")
702
779
 
703
- # Get all memory IDs
780
+ # Get memory IDs for active profile only
704
781
  conn = sqlite3.connect(self.db_path)
705
782
  cursor = conn.cursor()
706
- cursor.execute('SELECT id FROM memories ORDER BY created_at')
783
+ cursor.execute('SELECT id FROM memories WHERE profile = ? ORDER BY created_at',
784
+ (active_profile,))
707
785
  all_memory_ids = [row[0] for row in cursor.fetchall()]
708
786
  total_memories = len(all_memory_ids)
709
787
  conn.close()
710
788
 
711
789
  if total_memories == 0:
712
- print("No memories found. Add memories first.")
790
+ print(f"No memories found for profile '{active_profile}'. Add memories first.")
713
791
  return {'preferences': 0, 'styles': 0, 'terminology': 0}
714
792
 
715
- print(f"Analyzing {total_memories} memories...")
793
+ print(f"Analyzing {total_memories} memories for profile '{active_profile}'...")
716
794
 
717
795
  # Run all analyzers
718
796
  preferences = self.frequency_analyzer.analyze_preferences(all_memory_ids)
@@ -724,7 +802,7 @@ class PatternLearner:
724
802
  terms = self.terminology_learner.learn_terminology(all_memory_ids)
725
803
  print(f" Found {len(terms)} terminology patterns")
726
804
 
727
- # Recalculate confidence scores and save all patterns
805
+ # Recalculate confidence scores and save all patterns (tagged with profile)
728
806
  counts = {'preferences': 0, 'styles': 0, 'terminology': 0}
729
807
 
730
808
  for pattern in preferences.values():
@@ -736,6 +814,7 @@ class PatternLearner:
736
814
  total_memories
737
815
  )
738
816
  pattern['confidence'] = round(confidence, 2)
817
+ pattern['profile'] = active_profile
739
818
  self.pattern_store.save_pattern(pattern)
740
819
  counts['preferences'] += 1
741
820
 
@@ -748,6 +827,7 @@ class PatternLearner:
748
827
  total_memories
749
828
  )
750
829
  pattern['confidence'] = round(confidence, 2)
830
+ pattern['profile'] = active_profile
751
831
  self.pattern_store.save_pattern(pattern)
752
832
  counts['styles'] += 1
753
833
 
@@ -760,6 +840,7 @@ class PatternLearner:
760
840
  total_memories
761
841
  )
762
842
  pattern['confidence'] = round(confidence, 2)
843
+ pattern['profile'] = active_profile
763
844
  self.pattern_store.save_pattern(pattern)
764
845
  counts['terminology'] += 1
765
846
 
@@ -772,11 +853,11 @@ class PatternLearner:
772
853
 
773
854
  def on_new_memory(self, memory_id: int):
774
855
  """Incremental update when new memory is added."""
775
- # For now, just trigger full update if memory count is low
776
- # Future optimization: only update affected patterns
856
+ active_profile = self._get_active_profile()
777
857
  conn = sqlite3.connect(self.db_path)
778
858
  cursor = conn.cursor()
779
- cursor.execute('SELECT COUNT(*) FROM memories')
859
+ cursor.execute('SELECT COUNT(*) FROM memories WHERE profile = ?',
860
+ (active_profile,))
780
861
  total = cursor.fetchone()[0]
781
862
  conn.close()
782
863
 
@@ -789,8 +870,9 @@ class PatternLearner:
789
870
  self.weekly_pattern_update()
790
871
 
791
872
  def get_patterns(self, min_confidence: float = 0.7) -> List[Dict[str, Any]]:
792
- """Query patterns above confidence threshold."""
793
- return self.pattern_store.get_patterns(min_confidence)
873
+ """Query patterns above confidence threshold for active profile."""
874
+ active_profile = self._get_active_profile()
875
+ return self.pattern_store.get_patterns(min_confidence, profile=active_profile)
794
876
 
795
877
  def get_identity_context(self, min_confidence: float = 0.7) -> str:
796
878
  """Format patterns for Claude context injection."""