superlocalmemory 2.7.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.sh +59 -0
  7. package/mcp_server.py +83 -7
  8. package/package.json +1 -8
  9. package/scripts/generate-thumbnails.py +3 -5
  10. package/skills/slm-build-graph/SKILL.md +1 -1
  11. package/skills/slm-list-recent/SKILL.md +1 -1
  12. package/skills/slm-recall/SKILL.md +1 -1
  13. package/skills/slm-remember/SKILL.md +1 -1
  14. package/skills/slm-show-patterns/SKILL.md +1 -1
  15. package/skills/slm-status/SKILL.md +1 -1
  16. package/skills/slm-switch-profile/SKILL.md +1 -1
  17. package/src/agent_registry.py +7 -18
  18. package/src/auth_middleware.py +3 -5
  19. package/src/auto_backup.py +3 -7
  20. package/src/behavioral/__init__.py +49 -0
  21. package/src/behavioral/behavioral_listener.py +203 -0
  22. package/src/behavioral/behavioral_patterns.py +275 -0
  23. package/src/behavioral/cross_project_transfer.py +206 -0
  24. package/src/behavioral/outcome_inference.py +194 -0
  25. package/src/behavioral/outcome_tracker.py +193 -0
  26. package/src/behavioral/tests/__init__.py +4 -0
  27. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  28. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  29. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  30. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  31. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  32. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  33. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  34. package/src/cache_manager.py +4 -6
  35. package/src/compliance/__init__.py +48 -0
  36. package/src/compliance/abac_engine.py +149 -0
  37. package/src/compliance/abac_middleware.py +116 -0
  38. package/src/compliance/audit_db.py +215 -0
  39. package/src/compliance/audit_logger.py +148 -0
  40. package/src/compliance/retention_manager.py +289 -0
  41. package/src/compliance/retention_scheduler.py +186 -0
  42. package/src/compliance/tests/__init__.py +4 -0
  43. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  44. package/src/compliance/tests/test_abac_engine.py +124 -0
  45. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  46. package/src/compliance/tests/test_audit_db.py +123 -0
  47. package/src/compliance/tests/test_audit_logger.py +98 -0
  48. package/src/compliance/tests/test_mcp_audit.py +128 -0
  49. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  50. package/src/compliance/tests/test_retention_manager.py +131 -0
  51. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  52. package/src/db_connection_manager.py +2 -12
  53. package/src/embedding_engine.py +61 -669
  54. package/src/embeddings/__init__.py +47 -0
  55. package/src/embeddings/cache.py +70 -0
  56. package/src/embeddings/cli.py +113 -0
  57. package/src/embeddings/constants.py +47 -0
  58. package/src/embeddings/database.py +91 -0
  59. package/src/embeddings/engine.py +247 -0
  60. package/src/embeddings/model_loader.py +145 -0
  61. package/src/event_bus.py +3 -13
  62. package/src/graph/__init__.py +36 -0
  63. package/src/graph/build_helpers.py +74 -0
  64. package/src/graph/cli.py +87 -0
  65. package/src/graph/cluster_builder.py +188 -0
  66. package/src/graph/cluster_summary.py +148 -0
  67. package/src/graph/constants.py +47 -0
  68. package/src/graph/edge_builder.py +162 -0
  69. package/src/graph/entity_extractor.py +95 -0
  70. package/src/graph/graph_core.py +226 -0
  71. package/src/graph/graph_search.py +231 -0
  72. package/src/graph/hierarchical.py +207 -0
  73. package/src/graph/schema.py +99 -0
  74. package/src/graph_engine.py +45 -1451
  75. package/src/hnsw_index.py +3 -7
  76. package/src/hybrid_search.py +36 -683
  77. package/src/learning/__init__.py +27 -12
  78. package/src/learning/adaptive_ranker.py +50 -12
  79. package/src/learning/cross_project_aggregator.py +2 -12
  80. package/src/learning/engagement_tracker.py +2 -12
  81. package/src/learning/feature_extractor.py +175 -43
  82. package/src/learning/feedback_collector.py +7 -12
  83. package/src/learning/learning_db.py +180 -12
  84. package/src/learning/project_context_manager.py +2 -12
  85. package/src/learning/source_quality_scorer.py +2 -12
  86. package/src/learning/synthetic_bootstrap.py +2 -12
  87. package/src/learning/tests/__init__.py +2 -0
  88. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  89. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  90. package/src/learning/tests/test_aggregator.py +2 -6
  91. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  92. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  93. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  94. package/src/learning/tests/test_feedback_collector.py +2 -6
  95. package/src/learning/tests/test_learning_db.py +2 -6
  96. package/src/learning/tests/test_learning_db_v28.py +110 -0
  97. package/src/learning/tests/test_learning_init_v28.py +48 -0
  98. package/src/learning/tests/test_outcome_signals.py +48 -0
  99. package/src/learning/tests/test_project_context.py +2 -6
  100. package/src/learning/tests/test_schema_migration.py +319 -0
  101. package/src/learning/tests/test_signal_inference.py +11 -13
  102. package/src/learning/tests/test_source_quality.py +2 -6
  103. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  104. package/src/learning/tests/test_workflow_miner.py +2 -6
  105. package/src/learning/workflow_pattern_miner.py +2 -12
  106. package/src/lifecycle/__init__.py +54 -0
  107. package/src/lifecycle/bounded_growth.py +239 -0
  108. package/src/lifecycle/compaction_engine.py +226 -0
  109. package/src/lifecycle/lifecycle_engine.py +302 -0
  110. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  111. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  112. package/src/lifecycle/retention_policy.py +285 -0
  113. package/src/lifecycle/tests/__init__.py +4 -0
  114. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  115. package/src/lifecycle/tests/test_compaction.py +179 -0
  116. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  117. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  118. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  119. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  120. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  121. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  122. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  123. package/src/mcp_tools_v28.py +280 -0
  124. package/src/memory-profiles.py +2 -12
  125. package/src/memory-reset.py +2 -12
  126. package/src/memory_compression.py +2 -12
  127. package/src/memory_store_v2.py +76 -20
  128. package/src/migrate_v1_to_v2.py +2 -12
  129. package/src/pattern_learner.py +29 -975
  130. package/src/patterns/__init__.py +24 -0
  131. package/src/patterns/analyzers.py +247 -0
  132. package/src/patterns/learner.py +267 -0
  133. package/src/patterns/scoring.py +167 -0
  134. package/src/patterns/store.py +223 -0
  135. package/src/patterns/terminology.py +138 -0
  136. package/src/provenance_tracker.py +4 -14
  137. package/src/query_optimizer.py +4 -6
  138. package/src/rate_limiter.py +2 -6
  139. package/src/search/__init__.py +20 -0
  140. package/src/search/cli.py +77 -0
  141. package/src/search/constants.py +26 -0
  142. package/src/search/engine.py +239 -0
  143. package/src/search/fusion.py +122 -0
  144. package/src/search/index_loader.py +112 -0
  145. package/src/search/methods.py +162 -0
  146. package/src/search_engine_v2.py +4 -6
  147. package/src/setup_validator.py +7 -13
  148. package/src/subscription_manager.py +2 -12
  149. package/src/tree/__init__.py +59 -0
  150. package/src/tree/builder.py +183 -0
  151. package/src/tree/nodes.py +196 -0
  152. package/src/tree/queries.py +252 -0
  153. package/src/tree/schema.py +76 -0
  154. package/src/tree_manager.py +10 -711
  155. package/src/trust/__init__.py +45 -0
  156. package/src/trust/constants.py +66 -0
  157. package/src/trust/queries.py +157 -0
  158. package/src/trust/schema.py +95 -0
  159. package/src/trust/scorer.py +299 -0
  160. package/src/trust/signals.py +95 -0
  161. package/src/trust_scorer.py +39 -697
  162. package/src/webhook_dispatcher.py +2 -12
  163. package/ui/app.js +1 -1
  164. package/ui/js/agents.js +1 -1
  165. package/ui_server.py +2 -14
  166. package/ATTRIBUTION.md +0 -140
  167. package/docs/ARCHITECTURE-V2.5.md +0 -190
  168. package/docs/GRAPH-ENGINE.md +0 -503
  169. package/docs/architecture-diagram.drawio +0 -405
  170. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """
5
+ Confidence Scoring - Bayesian pattern confidence calculation.
6
+
7
+ Uses Beta-Binomial posterior with log-scaled competition,
8
+ recency bonuses, and temporal distribution factors.
9
+ """
10
+
11
+ import sqlite3
12
+ import logging
13
+ from datetime import datetime, timedelta
14
+ from typing import List
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ConfidenceScorer:
21
+ """Calculates and tracks pattern confidence scores."""
22
+
23
+ def __init__(self, db_path: Path):
24
+ self.db_path = db_path
25
+
26
+ def calculate_confidence(
27
+ self,
28
+ pattern_type: str,
29
+ key: str,
30
+ value: str,
31
+ evidence_memory_ids: List[int],
32
+ total_memories: int
33
+ ) -> float:
34
+ """
35
+ Calculate confidence using Beta-Binomial Bayesian posterior.
36
+
37
+ Based on MACLA (arXiv:2512.18950, Forouzandeh et al., Dec 2025):
38
+ posterior_mean = (alpha + evidence) / (alpha + beta + evidence + competition)
39
+
40
+ Adaptation: MACLA's Beta-Binomial uses pairwise interaction counts.
41
+ Our corpus has sparse signals (most memories are irrelevant to any
42
+ single pattern). We use log-scaled competition instead of raw total
43
+ to avoid over-dilution: competition = log2(total_memories).
44
+
45
+ Pattern-specific priors (alpha, beta):
46
+ - preference (1, 4): prior mean 0.20, ~8 items to reach 0.5
47
+ - style (1, 5): prior mean 0.17, subtler signals need more evidence
48
+ - terminology (2, 3): prior mean 0.40, direct usage signal
49
+ """
50
+ if total_memories == 0 or not evidence_memory_ids:
51
+ return 0.0
52
+
53
+ import math
54
+ evidence_count = len(evidence_memory_ids)
55
+
56
+ # Pattern-specific Beta priors (alpha, beta)
57
+ PRIORS = {
58
+ 'preference': (1.0, 4.0),
59
+ 'style': (1.0, 5.0),
60
+ 'terminology': (2.0, 3.0),
61
+ }
62
+ alpha, beta = PRIORS.get(pattern_type, (1.0, 4.0))
63
+
64
+ # Log-scaled competition: grows slowly with corpus size
65
+ # 10 memories -> 3.3, 60 -> 5.9, 500 -> 9.0, 5000 -> 12.3
66
+ competition = math.log2(max(2, total_memories))
67
+
68
+ # MACLA-inspired Beta posterior with log competition
69
+ posterior_mean = (alpha + evidence_count) / (alpha + beta + evidence_count + competition)
70
+
71
+ # Recency adjustment (mild: 1.0 to 1.15)
72
+ recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
73
+ recency_factor = 1.0 + min(0.15, 0.075 * (recency_bonus - 1.0) / 0.2) if recency_bonus > 1.0 else 1.0
74
+
75
+ # Temporal spread adjustment (0.9 to 1.1)
76
+ distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
77
+
78
+ # Final confidence
79
+ confidence = posterior_mean * recency_factor * distribution_factor
80
+
81
+ return min(0.95, round(confidence, 3))
82
+
83
+ def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
84
+ """Give bonus to patterns with recent evidence."""
85
+ conn = sqlite3.connect(self.db_path)
86
+ cursor = conn.cursor()
87
+
88
+ # Get timestamps
89
+ placeholders = ','.join('?' * len(memory_ids))
90
+ cursor.execute(f'''
91
+ SELECT created_at FROM memories
92
+ WHERE id IN ({placeholders})
93
+ ORDER BY created_at DESC
94
+ ''', memory_ids)
95
+
96
+ timestamps = cursor.fetchall()
97
+ conn.close()
98
+
99
+ if not timestamps:
100
+ return 1.0
101
+
102
+ # Check if any memories are from last 30 days
103
+ recent_count = 0
104
+ cutoff = datetime.now() - timedelta(days=30)
105
+
106
+ for ts_tuple in timestamps:
107
+ ts_str = ts_tuple[0]
108
+ try:
109
+ ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
110
+ if ts > cutoff:
111
+ recent_count += 1
112
+ except (ValueError, AttributeError):
113
+ pass
114
+
115
+ # Bonus if >50% are recent
116
+ if len(timestamps) > 0 and recent_count / len(timestamps) > 0.5:
117
+ return 1.2
118
+ else:
119
+ return 1.0
120
+
121
+ def _calculate_distribution_factor(self, memory_ids: List[int]) -> float:
122
+ """Better confidence if memories are distributed over time, not just one session."""
123
+ if len(memory_ids) < 3:
124
+ return 0.8 # Penalize low sample size
125
+
126
+ conn = sqlite3.connect(self.db_path)
127
+ cursor = conn.cursor()
128
+
129
+ placeholders = ','.join('?' * len(memory_ids))
130
+ cursor.execute(f'''
131
+ SELECT created_at FROM memories
132
+ WHERE id IN ({placeholders})
133
+ ORDER BY created_at
134
+ ''', memory_ids)
135
+
136
+ timestamps = [row[0] for row in cursor.fetchall()]
137
+ conn.close()
138
+
139
+ if len(timestamps) < 2:
140
+ return 0.8
141
+
142
+ try:
143
+ # Parse timestamps
144
+ dates = []
145
+ for ts_str in timestamps:
146
+ try:
147
+ ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
148
+ dates.append(ts)
149
+ except (ValueError, AttributeError):
150
+ pass
151
+
152
+ if len(dates) < 2:
153
+ return 0.8
154
+
155
+ # Calculate time span
156
+ time_span = (dates[-1] - dates[0]).days
157
+
158
+ # If memories span multiple days, higher confidence
159
+ if time_span > 7:
160
+ return 1.1
161
+ elif time_span > 1:
162
+ return 1.0
163
+ else:
164
+ return 0.9 # All on same day = might be one-off
165
+
166
+ except Exception:
167
+ return 1.0
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """
5
+ Pattern Store - SQLite-backed pattern storage and retrieval.
6
+
7
+ Handles identity_patterns and pattern_examples tables,
8
+ including schema migration, CRUD operations, and profile-scoped queries.
9
+ """
10
+
11
+ import sqlite3
12
+ import json
13
+ import logging
14
+ from typing import Dict, List, Optional, Any
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class PatternStore:
21
+ """Handles pattern storage and retrieval."""
22
+
23
+ def __init__(self, db_path: Path):
24
+ self.db_path = db_path
25
+ self._init_tables()
26
+
27
+ def _init_tables(self):
28
+ """Initialize pattern tables if they don't exist, or recreate if schema is incomplete."""
29
+ conn = sqlite3.connect(self.db_path)
30
+ cursor = conn.cursor()
31
+
32
+ # Check if existing tables have correct schema
33
+ for table_name, required_cols in [
34
+ ('identity_patterns', {'pattern_type', 'key', 'value', 'confidence'}),
35
+ ('pattern_examples', {'pattern_id', 'memory_id'}),
36
+ ]:
37
+ cursor.execute(f"PRAGMA table_info({table_name})")
38
+ existing_cols = {row[1] for row in cursor.fetchall()}
39
+ if existing_cols and not required_cols.issubset(existing_cols):
40
+ logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
41
+ cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
42
+
43
+ # Identity patterns table
44
+ cursor.execute('''
45
+ CREATE TABLE IF NOT EXISTS identity_patterns (
46
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
47
+ pattern_type TEXT NOT NULL,
48
+ key TEXT NOT NULL,
49
+ value TEXT NOT NULL,
50
+ confidence REAL DEFAULT 0.5,
51
+ evidence_count INTEGER DEFAULT 1,
52
+ memory_ids TEXT,
53
+ category TEXT,
54
+ profile TEXT DEFAULT 'default',
55
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
56
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
57
+ UNIQUE(pattern_type, key, category, profile)
58
+ )
59
+ ''')
60
+
61
+ # Add profile column if upgrading from older schema
62
+ try:
63
+ cursor.execute('ALTER TABLE identity_patterns ADD COLUMN profile TEXT DEFAULT "default"')
64
+ except sqlite3.OperationalError:
65
+ pass # Column already exists
66
+
67
+ # Pattern examples table
68
+ cursor.execute('''
69
+ CREATE TABLE IF NOT EXISTS pattern_examples (
70
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
71
+ pattern_id INTEGER NOT NULL,
72
+ memory_id INTEGER NOT NULL,
73
+ example_text TEXT,
74
+ FOREIGN KEY (pattern_id) REFERENCES identity_patterns(id) ON DELETE CASCADE,
75
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
76
+ )
77
+ ''')
78
+
79
+ # Indexes
80
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
81
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
82
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_profile ON identity_patterns(profile)')
83
+
84
+ conn.commit()
85
+ conn.close()
86
+
87
+ def save_pattern(self, pattern: Dict[str, Any]) -> int:
88
+ """Save or update a pattern (scoped by profile)."""
89
+ conn = sqlite3.connect(self.db_path)
90
+ cursor = conn.cursor()
91
+ profile = pattern.get('profile', 'default')
92
+
93
+ try:
94
+ # Check if pattern exists for this profile
95
+ cursor.execute('''
96
+ SELECT id FROM identity_patterns
97
+ WHERE pattern_type = ? AND key = ? AND category = ? AND profile = ?
98
+ ''', (pattern['pattern_type'], pattern['key'], pattern['category'], profile))
99
+
100
+ existing = cursor.fetchone()
101
+
102
+ memory_ids_json = json.dumps(pattern['memory_ids'])
103
+
104
+ if existing:
105
+ # Update existing pattern
106
+ pattern_id = existing[0]
107
+ cursor.execute('''
108
+ UPDATE identity_patterns
109
+ SET value = ?, confidence = ?, evidence_count = ?,
110
+ memory_ids = ?, updated_at = CURRENT_TIMESTAMP
111
+ WHERE id = ?
112
+ ''', (
113
+ pattern['value'],
114
+ pattern['confidence'],
115
+ pattern['evidence_count'],
116
+ memory_ids_json,
117
+ pattern_id
118
+ ))
119
+ else:
120
+ # Insert new pattern
121
+ cursor.execute('''
122
+ INSERT INTO identity_patterns
123
+ (pattern_type, key, value, confidence, evidence_count, memory_ids, category, profile)
124
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
125
+ ''', (
126
+ pattern['pattern_type'],
127
+ pattern['key'],
128
+ pattern['value'],
129
+ pattern['confidence'],
130
+ pattern['evidence_count'],
131
+ memory_ids_json,
132
+ pattern['category'],
133
+ profile
134
+ ))
135
+ pattern_id = cursor.lastrowid
136
+
137
+ # Save examples
138
+ self._save_pattern_examples(cursor, pattern_id, pattern['memory_ids'], pattern['key'])
139
+
140
+ conn.commit()
141
+ return pattern_id
142
+
143
+ finally:
144
+ conn.close()
145
+
146
+ def _save_pattern_examples(self, cursor, pattern_id: int, memory_ids: List[int], key: str):
147
+ """Save representative examples for pattern."""
148
+ # Clear old examples
149
+ cursor.execute('DELETE FROM pattern_examples WHERE pattern_id = ?', (pattern_id,))
150
+
151
+ # Save top 3 examples
152
+ for memory_id in memory_ids[:3]:
153
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
154
+ row = cursor.fetchone()
155
+
156
+ if row:
157
+ content = row[0]
158
+ excerpt = self._extract_relevant_excerpt(content, key)
159
+
160
+ cursor.execute('''
161
+ INSERT INTO pattern_examples (pattern_id, memory_id, example_text)
162
+ VALUES (?, ?, ?)
163
+ ''', (pattern_id, memory_id, excerpt))
164
+
165
+ def _extract_relevant_excerpt(self, content: str, key: str) -> str:
166
+ """Extract 150-char excerpt showing pattern."""
167
+ # Find first mention of key term
168
+ key_lower = key.lower().replace('_', ' ')
169
+ idx = content.lower().find(key_lower)
170
+
171
+ if idx >= 0:
172
+ start = max(0, idx - 50)
173
+ end = min(len(content), idx + 100)
174
+ excerpt = content[start:end]
175
+ return excerpt if len(excerpt) <= 150 else excerpt[:150] + '...'
176
+
177
+ # Fallback: first 150 chars
178
+ return content[:150] + ('...' if len(content) > 150 else '')
179
+
180
+ def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None,
181
+ profile: Optional[str] = None) -> List[Dict[str, Any]]:
182
+ """Get patterns above confidence threshold, optionally filtered by profile."""
183
+ conn = sqlite3.connect(self.db_path)
184
+ cursor = conn.cursor()
185
+
186
+ # Build query with optional filters
187
+ conditions = ['confidence >= ?']
188
+ params = [min_confidence]
189
+
190
+ if pattern_type:
191
+ conditions.append('pattern_type = ?')
192
+ params.append(pattern_type)
193
+
194
+ if profile:
195
+ conditions.append('profile = ?')
196
+ params.append(profile)
197
+
198
+ where_clause = ' AND '.join(conditions)
199
+ cursor.execute(f'''
200
+ SELECT id, pattern_type, key, value, confidence, evidence_count,
201
+ updated_at, created_at, category
202
+ FROM identity_patterns
203
+ WHERE {where_clause}
204
+ ORDER BY confidence DESC, evidence_count DESC
205
+ ''', params)
206
+
207
+ patterns = []
208
+ for row in cursor.fetchall():
209
+ patterns.append({
210
+ 'id': row[0],
211
+ 'pattern_type': row[1],
212
+ 'key': row[2],
213
+ 'value': row[3],
214
+ 'confidence': row[4],
215
+ 'evidence_count': row[5],
216
+ 'frequency': row[5],
217
+ 'last_seen': row[6],
218
+ 'created_at': row[7],
219
+ 'category': row[8]
220
+ })
221
+
222
+ conn.close()
223
+ return patterns
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """
5
+ Terminology Learner - User-specific term definition extraction.
6
+
7
+ Learns how the user defines ambiguous terms like 'optimize', 'refactor', etc.
8
+ by analyzing contextual co-occurrence patterns across memories.
9
+ """
10
+
11
+ import sqlite3
12
+ import re
13
+ import logging
14
+ from typing import Dict, List, Optional, Any
15
+ from collections import Counter
16
+ from pathlib import Path
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class TerminologyLearner:
22
+ """Learns user-specific definitions of common terms."""
23
+
24
+ def __init__(self, db_path: Path):
25
+ self.db_path = db_path
26
+
27
+ # Common ambiguous terms to learn
28
+ self.ambiguous_terms = [
29
+ 'optimize', 'refactor', 'clean', 'simple',
30
+ 'mvp', 'prototype', 'scale', 'production-ready',
31
+ 'fix', 'improve', 'update', 'enhance'
32
+ ]
33
+
34
+ def learn_terminology(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
35
+ """Learn user-specific term definitions."""
36
+ patterns = {}
37
+
38
+ conn = sqlite3.connect(self.db_path)
39
+ cursor = conn.cursor()
40
+
41
+ for term in self.ambiguous_terms:
42
+ contexts = []
43
+
44
+ # Find all contexts where term appears
45
+ for memory_id in memory_ids:
46
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
47
+ row = cursor.fetchone()
48
+
49
+ if not row:
50
+ continue
51
+
52
+ content = row[0]
53
+
54
+ # Find term in content (case-insensitive)
55
+ pattern = r'\b' + re.escape(term) + r'\b'
56
+ for match in re.finditer(pattern, content, re.IGNORECASE):
57
+ term_idx = match.start()
58
+
59
+ # Extract 100-char window around term
60
+ start = max(0, term_idx - 100)
61
+ end = min(len(content), term_idx + len(term) + 100)
62
+ context_window = content[start:end]
63
+
64
+ contexts.append({
65
+ 'memory_id': memory_id,
66
+ 'context': context_window
67
+ })
68
+
69
+ # Analyze contexts to extract meaning (need at least 3 examples)
70
+ if len(contexts) >= 3:
71
+ definition = self._extract_definition(term, contexts)
72
+
73
+ if definition:
74
+ evidence_list = list(set([ctx['memory_id'] for ctx in contexts]))
75
+
76
+ # Confidence increases with more examples, capped at 0.95
77
+ confidence = min(0.95, 0.6 + (len(contexts) * 0.05))
78
+
79
+ patterns[term] = {
80
+ 'pattern_type': 'terminology',
81
+ 'key': term,
82
+ 'value': definition,
83
+ 'confidence': round(confidence, 2),
84
+ 'evidence_count': len(evidence_list),
85
+ 'memory_ids': evidence_list,
86
+ 'category': 'general'
87
+ }
88
+
89
+ conn.close()
90
+ return patterns
91
+
92
+ def _extract_definition(self, term: str, contexts: List[Dict]) -> Optional[str]:
93
+ """Extract definition from contexts using pattern matching."""
94
+ # Collect words near the term across all contexts
95
+ nearby_words = []
96
+
97
+ for ctx in contexts:
98
+ words = re.findall(r'\b\w+\b', ctx['context'].lower())
99
+ nearby_words.extend(words)
100
+
101
+ # Count word frequencies
102
+ word_counts = Counter(nearby_words)
103
+
104
+ # Remove the term itself and common stopwords
105
+ stopwords = {'the', 'a', 'an', 'is', 'to', 'for', 'of', 'in', 'on', 'at',
106
+ 'and', 'or', 'but', 'with', 'from', 'by', 'this', 'that'}
107
+ word_counts = Counter({w: c for w, c in word_counts.items()
108
+ if w not in stopwords and w != term.lower()})
109
+
110
+ # Get top co-occurring words
111
+ top_words = [w for w, _ in word_counts.most_common(8)]
112
+
113
+ # Apply heuristic rules based on term and context
114
+ if term == 'optimize':
115
+ if any(w in top_words for w in ['performance', 'speed', 'faster', 'latency']):
116
+ return "Performance optimization (speed/latency)"
117
+ elif any(w in top_words for w in ['code', 'clean', 'refactor']):
118
+ return "Code quality optimization"
119
+
120
+ elif term == 'refactor':
121
+ if any(w in top_words for w in ['architecture', 'structure', 'design']):
122
+ return "Architecture change, not just renaming"
123
+ elif any(w in top_words for w in ['clean', 'organize', 'simplify']):
124
+ return "Code organization improvement"
125
+
126
+ elif term == 'mvp':
127
+ if any(w in top_words for w in ['core', 'basic', 'essential', 'minimal']):
128
+ return "Core features only, no polish"
129
+
130
+ elif term == 'production-ready':
131
+ if any(w in top_words for w in ['test', 'error', 'monitoring', 'deploy']):
132
+ return "Fully tested and monitored for deployment"
133
+
134
+ # Generic definition if specific pattern not matched
135
+ if len(top_words) >= 3:
136
+ return f"Commonly used with: {', '.join(top_words[:3])}"
137
+
138
+ return None
@@ -1,22 +1,12 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- SuperLocalMemory V2 - Provenance Tracker
4
- Copyright (c) 2026 Varun Pratap Bhardwaj
5
- Licensed under MIT License
6
-
7
- Repository: https://github.com/varun369/SuperLocalMemoryV2
8
- Author: Varun Pratap Bhardwaj (Solution Architect)
9
-
10
- NOTICE: This software is protected by MIT License.
11
- Attribution must be preserved in all copies or derivatives.
12
- """
13
-
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
14
4
  """
15
5
  ProvenanceTracker — Tracks the origin and lineage of every memory.
16
6
 
17
7
  Adds provenance columns to the memories table:
18
8
  created_by — Agent ID that created this memory (e.g., "mcp:claude-desktop")
19
- source_protocol — Protocol used (mcp, cli, rest, python, a2a)
9
+ source_protocol — Protocol used (mcp, cli, rest, python)
20
10
  trust_score — Trust score at time of creation (default 1.0)
21
11
  provenance_chain — JSON array of derivation history
22
12
 
@@ -174,7 +164,7 @@ class ProvenanceTracker:
174
164
  Args:
175
165
  memory_id: ID of the memory to annotate
176
166
  created_by: Agent ID that created this memory
177
- source_protocol: Protocol used (mcp, cli, rest, python, a2a)
167
+ source_protocol: Protocol used (mcp, cli, rest, python)
178
168
  trust_score: Trust score at time of creation
179
169
  derived_from: If this memory was derived from another, its ID
180
170
  """
@@ -1,16 +1,14 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- SuperLocalMemory V2 - Query Optimizer
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """SuperLocalMemory V2 - Query Optimizer
4
5
 
5
- Copyright (c) 2026 Varun Pratap Bhardwaj
6
6
  Solution Architect & Original Creator
7
7
 
8
- Licensed under MIT License (see LICENSE file)
9
- Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ (see LICENSE file)
10
9
 
11
10
  ATTRIBUTION REQUIRED: This notice must be preserved in all copies.
12
11
  """
13
-
14
12
  """
15
13
  Query Optimizer - Query Enhancement and Rewriting
16
14
 
@@ -1,10 +1,6 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- SuperLocalMemory V2 - Rate Limiter
4
- Copyright (c) 2026 Varun Pratap Bhardwaj
5
- Licensed under MIT License
6
- """
7
-
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
8
4
  """
9
5
  Lightweight rate limiter using sliding window algorithm.
10
6
  Pure stdlib — no external dependencies.
@@ -0,0 +1,20 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """search package - Hybrid Search System for SuperLocalMemory V2
4
+
5
+ Re-exports all public classes so that
6
+ ``from search import HybridSearchEngine`` works.
7
+ """
8
+ from search.engine import HybridSearchEngine
9
+ from search.fusion import FusionMixin
10
+ from search.methods import SearchMethodsMixin
11
+ from search.index_loader import IndexLoaderMixin
12
+ from search.cli import main
13
+
14
+ __all__ = [
15
+ "HybridSearchEngine",
16
+ "FusionMixin",
17
+ "SearchMethodsMixin",
18
+ "IndexLoaderMixin",
19
+ "main",
20
+ ]