superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Confidence Scoring - Bayesian pattern confidence calculation.
|
|
6
|
+
|
|
7
|
+
Uses Beta-Binomial posterior with log-scaled competition,
|
|
8
|
+
recency bonuses, and temporal distribution factors.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
from typing import List
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConfidenceScorer:
|
|
21
|
+
"""Calculates and tracks pattern confidence scores."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, db_path: Path):
|
|
24
|
+
self.db_path = db_path
|
|
25
|
+
|
|
26
|
+
def calculate_confidence(
|
|
27
|
+
self,
|
|
28
|
+
pattern_type: str,
|
|
29
|
+
key: str,
|
|
30
|
+
value: str,
|
|
31
|
+
evidence_memory_ids: List[int],
|
|
32
|
+
total_memories: int
|
|
33
|
+
) -> float:
|
|
34
|
+
"""
|
|
35
|
+
Calculate confidence using Beta-Binomial Bayesian posterior.
|
|
36
|
+
|
|
37
|
+
Based on MACLA (arXiv:2512.18950, Forouzandeh et al., Dec 2025):
|
|
38
|
+
posterior_mean = (alpha + evidence) / (alpha + beta + evidence + competition)
|
|
39
|
+
|
|
40
|
+
Adaptation: MACLA's Beta-Binomial uses pairwise interaction counts.
|
|
41
|
+
Our corpus has sparse signals (most memories are irrelevant to any
|
|
42
|
+
single pattern). We use log-scaled competition instead of raw total
|
|
43
|
+
to avoid over-dilution: competition = log2(total_memories).
|
|
44
|
+
|
|
45
|
+
Pattern-specific priors (alpha, beta):
|
|
46
|
+
- preference (1, 4): prior mean 0.20, ~8 items to reach 0.5
|
|
47
|
+
- style (1, 5): prior mean 0.17, subtler signals need more evidence
|
|
48
|
+
- terminology (2, 3): prior mean 0.40, direct usage signal
|
|
49
|
+
"""
|
|
50
|
+
if total_memories == 0 or not evidence_memory_ids:
|
|
51
|
+
return 0.0
|
|
52
|
+
|
|
53
|
+
import math
|
|
54
|
+
evidence_count = len(evidence_memory_ids)
|
|
55
|
+
|
|
56
|
+
# Pattern-specific Beta priors (alpha, beta)
|
|
57
|
+
PRIORS = {
|
|
58
|
+
'preference': (1.0, 4.0),
|
|
59
|
+
'style': (1.0, 5.0),
|
|
60
|
+
'terminology': (2.0, 3.0),
|
|
61
|
+
}
|
|
62
|
+
alpha, beta = PRIORS.get(pattern_type, (1.0, 4.0))
|
|
63
|
+
|
|
64
|
+
# Log-scaled competition: grows slowly with corpus size
|
|
65
|
+
# 10 memories -> 3.3, 60 -> 5.9, 500 -> 9.0, 5000 -> 12.3
|
|
66
|
+
competition = math.log2(max(2, total_memories))
|
|
67
|
+
|
|
68
|
+
# MACLA-inspired Beta posterior with log competition
|
|
69
|
+
posterior_mean = (alpha + evidence_count) / (alpha + beta + evidence_count + competition)
|
|
70
|
+
|
|
71
|
+
# Recency adjustment (mild: 1.0 to 1.15)
|
|
72
|
+
recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
|
|
73
|
+
recency_factor = 1.0 + min(0.15, 0.075 * (recency_bonus - 1.0) / 0.2) if recency_bonus > 1.0 else 1.0
|
|
74
|
+
|
|
75
|
+
# Temporal spread adjustment (0.9 to 1.1)
|
|
76
|
+
distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
|
|
77
|
+
|
|
78
|
+
# Final confidence
|
|
79
|
+
confidence = posterior_mean * recency_factor * distribution_factor
|
|
80
|
+
|
|
81
|
+
return min(0.95, round(confidence, 3))
|
|
82
|
+
|
|
83
|
+
def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
|
|
84
|
+
"""Give bonus to patterns with recent evidence."""
|
|
85
|
+
conn = sqlite3.connect(self.db_path)
|
|
86
|
+
cursor = conn.cursor()
|
|
87
|
+
|
|
88
|
+
# Get timestamps
|
|
89
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
90
|
+
cursor.execute(f'''
|
|
91
|
+
SELECT created_at FROM memories
|
|
92
|
+
WHERE id IN ({placeholders})
|
|
93
|
+
ORDER BY created_at DESC
|
|
94
|
+
''', memory_ids)
|
|
95
|
+
|
|
96
|
+
timestamps = cursor.fetchall()
|
|
97
|
+
conn.close()
|
|
98
|
+
|
|
99
|
+
if not timestamps:
|
|
100
|
+
return 1.0
|
|
101
|
+
|
|
102
|
+
# Check if any memories are from last 30 days
|
|
103
|
+
recent_count = 0
|
|
104
|
+
cutoff = datetime.now() - timedelta(days=30)
|
|
105
|
+
|
|
106
|
+
for ts_tuple in timestamps:
|
|
107
|
+
ts_str = ts_tuple[0]
|
|
108
|
+
try:
|
|
109
|
+
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
110
|
+
if ts > cutoff:
|
|
111
|
+
recent_count += 1
|
|
112
|
+
except (ValueError, AttributeError):
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Bonus if >50% are recent
|
|
116
|
+
if len(timestamps) > 0 and recent_count / len(timestamps) > 0.5:
|
|
117
|
+
return 1.2
|
|
118
|
+
else:
|
|
119
|
+
return 1.0
|
|
120
|
+
|
|
121
|
+
def _calculate_distribution_factor(self, memory_ids: List[int]) -> float:
|
|
122
|
+
"""Better confidence if memories are distributed over time, not just one session."""
|
|
123
|
+
if len(memory_ids) < 3:
|
|
124
|
+
return 0.8 # Penalize low sample size
|
|
125
|
+
|
|
126
|
+
conn = sqlite3.connect(self.db_path)
|
|
127
|
+
cursor = conn.cursor()
|
|
128
|
+
|
|
129
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
130
|
+
cursor.execute(f'''
|
|
131
|
+
SELECT created_at FROM memories
|
|
132
|
+
WHERE id IN ({placeholders})
|
|
133
|
+
ORDER BY created_at
|
|
134
|
+
''', memory_ids)
|
|
135
|
+
|
|
136
|
+
timestamps = [row[0] for row in cursor.fetchall()]
|
|
137
|
+
conn.close()
|
|
138
|
+
|
|
139
|
+
if len(timestamps) < 2:
|
|
140
|
+
return 0.8
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Parse timestamps
|
|
144
|
+
dates = []
|
|
145
|
+
for ts_str in timestamps:
|
|
146
|
+
try:
|
|
147
|
+
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
148
|
+
dates.append(ts)
|
|
149
|
+
except (ValueError, AttributeError):
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
if len(dates) < 2:
|
|
153
|
+
return 0.8
|
|
154
|
+
|
|
155
|
+
# Calculate time span
|
|
156
|
+
time_span = (dates[-1] - dates[0]).days
|
|
157
|
+
|
|
158
|
+
# If memories span multiple days, higher confidence
|
|
159
|
+
if time_span > 7:
|
|
160
|
+
return 1.1
|
|
161
|
+
elif time_span > 1:
|
|
162
|
+
return 1.0
|
|
163
|
+
else:
|
|
164
|
+
return 0.9 # All on same day = might be one-off
|
|
165
|
+
|
|
166
|
+
except Exception:
|
|
167
|
+
return 1.0
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Pattern Store - SQLite-backed pattern storage and retrieval.
|
|
6
|
+
|
|
7
|
+
Handles identity_patterns and pattern_examples tables,
|
|
8
|
+
including schema migration, CRUD operations, and profile-scoped queries.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Dict, List, Optional, Any
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PatternStore:
|
|
21
|
+
"""Handles pattern storage and retrieval."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, db_path: Path):
|
|
24
|
+
self.db_path = db_path
|
|
25
|
+
self._init_tables()
|
|
26
|
+
|
|
27
|
+
def _init_tables(self):
|
|
28
|
+
"""Initialize pattern tables if they don't exist, or recreate if schema is incomplete."""
|
|
29
|
+
conn = sqlite3.connect(self.db_path)
|
|
30
|
+
cursor = conn.cursor()
|
|
31
|
+
|
|
32
|
+
# Check if existing tables have correct schema
|
|
33
|
+
for table_name, required_cols in [
|
|
34
|
+
('identity_patterns', {'pattern_type', 'key', 'value', 'confidence'}),
|
|
35
|
+
('pattern_examples', {'pattern_id', 'memory_id'}),
|
|
36
|
+
]:
|
|
37
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
38
|
+
existing_cols = {row[1] for row in cursor.fetchall()}
|
|
39
|
+
if existing_cols and not required_cols.issubset(existing_cols):
|
|
40
|
+
logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
|
|
41
|
+
cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
|
|
42
|
+
|
|
43
|
+
# Identity patterns table
|
|
44
|
+
cursor.execute('''
|
|
45
|
+
CREATE TABLE IF NOT EXISTS identity_patterns (
|
|
46
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
47
|
+
pattern_type TEXT NOT NULL,
|
|
48
|
+
key TEXT NOT NULL,
|
|
49
|
+
value TEXT NOT NULL,
|
|
50
|
+
confidence REAL DEFAULT 0.5,
|
|
51
|
+
evidence_count INTEGER DEFAULT 1,
|
|
52
|
+
memory_ids TEXT,
|
|
53
|
+
category TEXT,
|
|
54
|
+
profile TEXT DEFAULT 'default',
|
|
55
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
56
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
57
|
+
UNIQUE(pattern_type, key, category, profile)
|
|
58
|
+
)
|
|
59
|
+
''')
|
|
60
|
+
|
|
61
|
+
# Add profile column if upgrading from older schema
|
|
62
|
+
try:
|
|
63
|
+
cursor.execute('ALTER TABLE identity_patterns ADD COLUMN profile TEXT DEFAULT "default"')
|
|
64
|
+
except sqlite3.OperationalError:
|
|
65
|
+
pass # Column already exists
|
|
66
|
+
|
|
67
|
+
# Pattern examples table
|
|
68
|
+
cursor.execute('''
|
|
69
|
+
CREATE TABLE IF NOT EXISTS pattern_examples (
|
|
70
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
71
|
+
pattern_id INTEGER NOT NULL,
|
|
72
|
+
memory_id INTEGER NOT NULL,
|
|
73
|
+
example_text TEXT,
|
|
74
|
+
FOREIGN KEY (pattern_id) REFERENCES identity_patterns(id) ON DELETE CASCADE,
|
|
75
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
76
|
+
)
|
|
77
|
+
''')
|
|
78
|
+
|
|
79
|
+
# Indexes
|
|
80
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
|
|
81
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
|
|
82
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_profile ON identity_patterns(profile)')
|
|
83
|
+
|
|
84
|
+
conn.commit()
|
|
85
|
+
conn.close()
|
|
86
|
+
|
|
87
|
+
def save_pattern(self, pattern: Dict[str, Any]) -> int:
|
|
88
|
+
"""Save or update a pattern (scoped by profile)."""
|
|
89
|
+
conn = sqlite3.connect(self.db_path)
|
|
90
|
+
cursor = conn.cursor()
|
|
91
|
+
profile = pattern.get('profile', 'default')
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# Check if pattern exists for this profile
|
|
95
|
+
cursor.execute('''
|
|
96
|
+
SELECT id FROM identity_patterns
|
|
97
|
+
WHERE pattern_type = ? AND key = ? AND category = ? AND profile = ?
|
|
98
|
+
''', (pattern['pattern_type'], pattern['key'], pattern['category'], profile))
|
|
99
|
+
|
|
100
|
+
existing = cursor.fetchone()
|
|
101
|
+
|
|
102
|
+
memory_ids_json = json.dumps(pattern['memory_ids'])
|
|
103
|
+
|
|
104
|
+
if existing:
|
|
105
|
+
# Update existing pattern
|
|
106
|
+
pattern_id = existing[0]
|
|
107
|
+
cursor.execute('''
|
|
108
|
+
UPDATE identity_patterns
|
|
109
|
+
SET value = ?, confidence = ?, evidence_count = ?,
|
|
110
|
+
memory_ids = ?, updated_at = CURRENT_TIMESTAMP
|
|
111
|
+
WHERE id = ?
|
|
112
|
+
''', (
|
|
113
|
+
pattern['value'],
|
|
114
|
+
pattern['confidence'],
|
|
115
|
+
pattern['evidence_count'],
|
|
116
|
+
memory_ids_json,
|
|
117
|
+
pattern_id
|
|
118
|
+
))
|
|
119
|
+
else:
|
|
120
|
+
# Insert new pattern
|
|
121
|
+
cursor.execute('''
|
|
122
|
+
INSERT INTO identity_patterns
|
|
123
|
+
(pattern_type, key, value, confidence, evidence_count, memory_ids, category, profile)
|
|
124
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
125
|
+
''', (
|
|
126
|
+
pattern['pattern_type'],
|
|
127
|
+
pattern['key'],
|
|
128
|
+
pattern['value'],
|
|
129
|
+
pattern['confidence'],
|
|
130
|
+
pattern['evidence_count'],
|
|
131
|
+
memory_ids_json,
|
|
132
|
+
pattern['category'],
|
|
133
|
+
profile
|
|
134
|
+
))
|
|
135
|
+
pattern_id = cursor.lastrowid
|
|
136
|
+
|
|
137
|
+
# Save examples
|
|
138
|
+
self._save_pattern_examples(cursor, pattern_id, pattern['memory_ids'], pattern['key'])
|
|
139
|
+
|
|
140
|
+
conn.commit()
|
|
141
|
+
return pattern_id
|
|
142
|
+
|
|
143
|
+
finally:
|
|
144
|
+
conn.close()
|
|
145
|
+
|
|
146
|
+
def _save_pattern_examples(self, cursor, pattern_id: int, memory_ids: List[int], key: str):
|
|
147
|
+
"""Save representative examples for pattern."""
|
|
148
|
+
# Clear old examples
|
|
149
|
+
cursor.execute('DELETE FROM pattern_examples WHERE pattern_id = ?', (pattern_id,))
|
|
150
|
+
|
|
151
|
+
# Save top 3 examples
|
|
152
|
+
for memory_id in memory_ids[:3]:
|
|
153
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
154
|
+
row = cursor.fetchone()
|
|
155
|
+
|
|
156
|
+
if row:
|
|
157
|
+
content = row[0]
|
|
158
|
+
excerpt = self._extract_relevant_excerpt(content, key)
|
|
159
|
+
|
|
160
|
+
cursor.execute('''
|
|
161
|
+
INSERT INTO pattern_examples (pattern_id, memory_id, example_text)
|
|
162
|
+
VALUES (?, ?, ?)
|
|
163
|
+
''', (pattern_id, memory_id, excerpt))
|
|
164
|
+
|
|
165
|
+
def _extract_relevant_excerpt(self, content: str, key: str) -> str:
|
|
166
|
+
"""Extract 150-char excerpt showing pattern."""
|
|
167
|
+
# Find first mention of key term
|
|
168
|
+
key_lower = key.lower().replace('_', ' ')
|
|
169
|
+
idx = content.lower().find(key_lower)
|
|
170
|
+
|
|
171
|
+
if idx >= 0:
|
|
172
|
+
start = max(0, idx - 50)
|
|
173
|
+
end = min(len(content), idx + 100)
|
|
174
|
+
excerpt = content[start:end]
|
|
175
|
+
return excerpt if len(excerpt) <= 150 else excerpt[:150] + '...'
|
|
176
|
+
|
|
177
|
+
# Fallback: first 150 chars
|
|
178
|
+
return content[:150] + ('...' if len(content) > 150 else '')
|
|
179
|
+
|
|
180
|
+
def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None,
|
|
181
|
+
profile: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
182
|
+
"""Get patterns above confidence threshold, optionally filtered by profile."""
|
|
183
|
+
conn = sqlite3.connect(self.db_path)
|
|
184
|
+
cursor = conn.cursor()
|
|
185
|
+
|
|
186
|
+
# Build query with optional filters
|
|
187
|
+
conditions = ['confidence >= ?']
|
|
188
|
+
params = [min_confidence]
|
|
189
|
+
|
|
190
|
+
if pattern_type:
|
|
191
|
+
conditions.append('pattern_type = ?')
|
|
192
|
+
params.append(pattern_type)
|
|
193
|
+
|
|
194
|
+
if profile:
|
|
195
|
+
conditions.append('profile = ?')
|
|
196
|
+
params.append(profile)
|
|
197
|
+
|
|
198
|
+
where_clause = ' AND '.join(conditions)
|
|
199
|
+
cursor.execute(f'''
|
|
200
|
+
SELECT id, pattern_type, key, value, confidence, evidence_count,
|
|
201
|
+
updated_at, created_at, category
|
|
202
|
+
FROM identity_patterns
|
|
203
|
+
WHERE {where_clause}
|
|
204
|
+
ORDER BY confidence DESC, evidence_count DESC
|
|
205
|
+
''', params)
|
|
206
|
+
|
|
207
|
+
patterns = []
|
|
208
|
+
for row in cursor.fetchall():
|
|
209
|
+
patterns.append({
|
|
210
|
+
'id': row[0],
|
|
211
|
+
'pattern_type': row[1],
|
|
212
|
+
'key': row[2],
|
|
213
|
+
'value': row[3],
|
|
214
|
+
'confidence': row[4],
|
|
215
|
+
'evidence_count': row[5],
|
|
216
|
+
'frequency': row[5],
|
|
217
|
+
'last_seen': row[6],
|
|
218
|
+
'created_at': row[7],
|
|
219
|
+
'category': row[8]
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
conn.close()
|
|
223
|
+
return patterns
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Terminology Learner - User-specific term definition extraction.
|
|
6
|
+
|
|
7
|
+
Learns how the user defines ambiguous terms like 'optimize', 'refactor', etc.
|
|
8
|
+
by analyzing contextual co-occurrence patterns across memories.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
import re
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Dict, List, Optional, Any
|
|
15
|
+
from collections import Counter
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TerminologyLearner:
|
|
22
|
+
"""Learns user-specific definitions of common terms."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Path):
|
|
25
|
+
self.db_path = db_path
|
|
26
|
+
|
|
27
|
+
# Common ambiguous terms to learn
|
|
28
|
+
self.ambiguous_terms = [
|
|
29
|
+
'optimize', 'refactor', 'clean', 'simple',
|
|
30
|
+
'mvp', 'prototype', 'scale', 'production-ready',
|
|
31
|
+
'fix', 'improve', 'update', 'enhance'
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def learn_terminology(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
35
|
+
"""Learn user-specific term definitions."""
|
|
36
|
+
patterns = {}
|
|
37
|
+
|
|
38
|
+
conn = sqlite3.connect(self.db_path)
|
|
39
|
+
cursor = conn.cursor()
|
|
40
|
+
|
|
41
|
+
for term in self.ambiguous_terms:
|
|
42
|
+
contexts = []
|
|
43
|
+
|
|
44
|
+
# Find all contexts where term appears
|
|
45
|
+
for memory_id in memory_ids:
|
|
46
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
47
|
+
row = cursor.fetchone()
|
|
48
|
+
|
|
49
|
+
if not row:
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
content = row[0]
|
|
53
|
+
|
|
54
|
+
# Find term in content (case-insensitive)
|
|
55
|
+
pattern = r'\b' + re.escape(term) + r'\b'
|
|
56
|
+
for match in re.finditer(pattern, content, re.IGNORECASE):
|
|
57
|
+
term_idx = match.start()
|
|
58
|
+
|
|
59
|
+
# Extract 100-char window around term
|
|
60
|
+
start = max(0, term_idx - 100)
|
|
61
|
+
end = min(len(content), term_idx + len(term) + 100)
|
|
62
|
+
context_window = content[start:end]
|
|
63
|
+
|
|
64
|
+
contexts.append({
|
|
65
|
+
'memory_id': memory_id,
|
|
66
|
+
'context': context_window
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
# Analyze contexts to extract meaning (need at least 3 examples)
|
|
70
|
+
if len(contexts) >= 3:
|
|
71
|
+
definition = self._extract_definition(term, contexts)
|
|
72
|
+
|
|
73
|
+
if definition:
|
|
74
|
+
evidence_list = list(set([ctx['memory_id'] for ctx in contexts]))
|
|
75
|
+
|
|
76
|
+
# Confidence increases with more examples, capped at 0.95
|
|
77
|
+
confidence = min(0.95, 0.6 + (len(contexts) * 0.05))
|
|
78
|
+
|
|
79
|
+
patterns[term] = {
|
|
80
|
+
'pattern_type': 'terminology',
|
|
81
|
+
'key': term,
|
|
82
|
+
'value': definition,
|
|
83
|
+
'confidence': round(confidence, 2),
|
|
84
|
+
'evidence_count': len(evidence_list),
|
|
85
|
+
'memory_ids': evidence_list,
|
|
86
|
+
'category': 'general'
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
conn.close()
|
|
90
|
+
return patterns
|
|
91
|
+
|
|
92
|
+
def _extract_definition(self, term: str, contexts: List[Dict]) -> Optional[str]:
|
|
93
|
+
"""Extract definition from contexts using pattern matching."""
|
|
94
|
+
# Collect words near the term across all contexts
|
|
95
|
+
nearby_words = []
|
|
96
|
+
|
|
97
|
+
for ctx in contexts:
|
|
98
|
+
words = re.findall(r'\b\w+\b', ctx['context'].lower())
|
|
99
|
+
nearby_words.extend(words)
|
|
100
|
+
|
|
101
|
+
# Count word frequencies
|
|
102
|
+
word_counts = Counter(nearby_words)
|
|
103
|
+
|
|
104
|
+
# Remove the term itself and common stopwords
|
|
105
|
+
stopwords = {'the', 'a', 'an', 'is', 'to', 'for', 'of', 'in', 'on', 'at',
|
|
106
|
+
'and', 'or', 'but', 'with', 'from', 'by', 'this', 'that'}
|
|
107
|
+
word_counts = Counter({w: c for w, c in word_counts.items()
|
|
108
|
+
if w not in stopwords and w != term.lower()})
|
|
109
|
+
|
|
110
|
+
# Get top co-occurring words
|
|
111
|
+
top_words = [w for w, _ in word_counts.most_common(8)]
|
|
112
|
+
|
|
113
|
+
# Apply heuristic rules based on term and context
|
|
114
|
+
if term == 'optimize':
|
|
115
|
+
if any(w in top_words for w in ['performance', 'speed', 'faster', 'latency']):
|
|
116
|
+
return "Performance optimization (speed/latency)"
|
|
117
|
+
elif any(w in top_words for w in ['code', 'clean', 'refactor']):
|
|
118
|
+
return "Code quality optimization"
|
|
119
|
+
|
|
120
|
+
elif term == 'refactor':
|
|
121
|
+
if any(w in top_words for w in ['architecture', 'structure', 'design']):
|
|
122
|
+
return "Architecture change, not just renaming"
|
|
123
|
+
elif any(w in top_words for w in ['clean', 'organize', 'simplify']):
|
|
124
|
+
return "Code organization improvement"
|
|
125
|
+
|
|
126
|
+
elif term == 'mvp':
|
|
127
|
+
if any(w in top_words for w in ['core', 'basic', 'essential', 'minimal']):
|
|
128
|
+
return "Core features only, no polish"
|
|
129
|
+
|
|
130
|
+
elif term == 'production-ready':
|
|
131
|
+
if any(w in top_words for w in ['test', 'error', 'monitoring', 'deploy']):
|
|
132
|
+
return "Fully tested and monitored for deployment"
|
|
133
|
+
|
|
134
|
+
# Generic definition if specific pattern not matched
|
|
135
|
+
if len(top_words) >= 3:
|
|
136
|
+
return f"Commonly used with: {', '.join(top_words[:3])}"
|
|
137
|
+
|
|
138
|
+
return None
|
|
@@ -1,22 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
-
Licensed under MIT License
|
|
6
|
-
|
|
7
|
-
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
-
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
-
|
|
10
|
-
NOTICE: This software is protected by MIT License.
|
|
11
|
-
Attribution must be preserved in all copies or derivatives.
|
|
12
|
-
"""
|
|
13
|
-
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
14
4
|
"""
|
|
15
5
|
ProvenanceTracker — Tracks the origin and lineage of every memory.
|
|
16
6
|
|
|
17
7
|
Adds provenance columns to the memories table:
|
|
18
8
|
created_by — Agent ID that created this memory (e.g., "mcp:claude-desktop")
|
|
19
|
-
source_protocol — Protocol used (mcp, cli, rest, python
|
|
9
|
+
source_protocol — Protocol used (mcp, cli, rest, python)
|
|
20
10
|
trust_score — Trust score at time of creation (default 1.0)
|
|
21
11
|
provenance_chain — JSON array of derivation history
|
|
22
12
|
|
|
@@ -174,7 +164,7 @@ class ProvenanceTracker:
|
|
|
174
164
|
Args:
|
|
175
165
|
memory_id: ID of the memory to annotate
|
|
176
166
|
created_by: Agent ID that created this memory
|
|
177
|
-
source_protocol: Protocol used (mcp, cli, rest, python
|
|
167
|
+
source_protocol: Protocol used (mcp, cli, rest, python)
|
|
178
168
|
trust_score: Trust score at time of creation
|
|
179
169
|
derived_from: If this memory was derived from another, its ID
|
|
180
170
|
"""
|
package/src/query_optimizer.py
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""SuperLocalMemory V2 - Query Optimizer
|
|
4
5
|
|
|
5
|
-
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
6
6
|
Solution Architect & Original Creator
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
(see LICENSE file)
|
|
10
9
|
|
|
11
10
|
ATTRIBUTION REQUIRED: This notice must be preserved in all copies.
|
|
12
11
|
"""
|
|
13
|
-
|
|
14
12
|
"""
|
|
15
13
|
Query Optimizer - Query Enhancement and Rewriting
|
|
16
14
|
|
package/src/rate_limiter.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
-
Licensed under MIT License
|
|
6
|
-
"""
|
|
7
|
-
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
8
4
|
"""
|
|
9
5
|
Lightweight rate limiter using sliding window algorithm.
|
|
10
6
|
Pure stdlib — no external dependencies.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""search package - Hybrid Search System for SuperLocalMemory V2
|
|
4
|
+
|
|
5
|
+
Re-exports all public classes so that
|
|
6
|
+
``from search import HybridSearchEngine`` works.
|
|
7
|
+
"""
|
|
8
|
+
from search.engine import HybridSearchEngine
|
|
9
|
+
from search.fusion import FusionMixin
|
|
10
|
+
from search.methods import SearchMethodsMixin
|
|
11
|
+
from search.index_loader import IndexLoaderMixin
|
|
12
|
+
from search.cli import main
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"HybridSearchEngine",
|
|
16
|
+
"FusionMixin",
|
|
17
|
+
"SearchMethodsMixin",
|
|
18
|
+
"IndexLoaderMixin",
|
|
19
|
+
"main",
|
|
20
|
+
]
|