superlocalmemory 2.7.1 → 2.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -1
- package/README.md +1 -1
- package/docs/ARCHITECTURE.md +8 -8
- package/docs/COMPRESSION-README.md +1 -1
- package/docs/SEARCH-ENGINE-V2.2.0.md +1 -0
- package/mcp_server.py +77 -0
- package/package.json +1 -1
- package/src/agent_registry.py +3 -3
- package/src/graph_engine.py +15 -11
- package/src/learning/feature_extractor.py +77 -16
- package/src/learning/feedback_collector.py +9 -2
- package/src/learning/tests/test_synthetic_bootstrap.py +1 -1
- package/src/trust_scorer.py +288 -74
- package/ui/app.js +4 -4
- package/ui/js/agents.js +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -16,6 +16,18 @@ SuperLocalMemory V2 - Intelligent local memory system for AI coding assistants.
|
|
|
16
16
|
|
|
17
17
|
---
|
|
18
18
|
|
|
19
|
+
## [2.7.3] - 2026-02-16
|
|
20
|
+
|
|
21
|
+
### Improved
|
|
22
|
+
- Enhanced trust scoring accuracy
|
|
23
|
+
- Improved search result relevance across all access methods
|
|
24
|
+
- Better error handling for optional components
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
- Corrected outdated performance references in documentation
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
19
31
|
## [2.7.1] - 2026-02-16
|
|
20
32
|
|
|
21
33
|
### Added
|
|
@@ -270,7 +282,7 @@ SuperLocalMemory V2 represents a complete architectural rewrite with intelligent
|
|
|
270
282
|
- **Profile Management** - Multi-profile support with isolated databases
|
|
271
283
|
|
|
272
284
|
### Performance
|
|
273
|
-
-
|
|
285
|
+
- Improved search performance over V1 (see Performance Benchmarks)
|
|
274
286
|
- 60-96% storage reduction with compression
|
|
275
287
|
|
|
276
288
|
### Research Foundation
|
package/README.md
CHANGED
|
@@ -425,7 +425,7 @@ WAL mode + serialized write queue = zero "database is locked" errors, ever.
|
|
|
425
425
|
|
|
426
426
|
### Storage
|
|
427
427
|
|
|
428
|
-
10,000 memories = **13.6 MB** on disk (~1.
|
|
428
|
+
10,000 memories = **13.6 MB** on disk (~1.4 KB per memory). Your entire AI memory history takes less space than a photo.
|
|
429
429
|
|
|
430
430
|
### Graph Construction
|
|
431
431
|
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -381,7 +381,7 @@ SuperLocalMemory V2 uses a hierarchical, additive architecture where each layer
|
|
|
381
381
|
- Cold storage archival
|
|
382
382
|
|
|
383
383
|
**Performance:**
|
|
384
|
-
- Full-text search:
|
|
384
|
+
- Full-text search: Sub-11ms median for typical databases (see wiki Performance Benchmarks for measured data)
|
|
385
385
|
- Insert: <10ms
|
|
386
386
|
- Tag search: ~30ms
|
|
387
387
|
|
|
@@ -737,7 +737,7 @@ Vue: 10% confidence → Low priority, exploratory
|
|
|
737
737
|
| Operation | Complexity | Typical Time |
|
|
738
738
|
|-----------|-----------|--------------|
|
|
739
739
|
| Add memory | O(1) | <10ms |
|
|
740
|
-
| Search (FTS5) | O(log n) | ~
|
|
740
|
+
| Search (FTS5) | O(log n) | ~11ms median (100 memories) |
|
|
741
741
|
| Graph build | O(n²) worst, O(n log n) avg | ~2s (100 memories) |
|
|
742
742
|
| Pattern update | O(n) | <2s (100 memories) |
|
|
743
743
|
| Find related | O(1) | <10ms |
|
|
@@ -794,12 +794,12 @@ Vue: 10% confidence → Low priority, exploratory
|
|
|
794
794
|
|
|
795
795
|
### Current Limits (Tested)
|
|
796
796
|
|
|
797
|
-
| Memories | Build Time | Search Time | Database Size |
|
|
798
|
-
|
|
799
|
-
|
|
|
800
|
-
|
|
|
801
|
-
|
|
|
802
|
-
|
|
|
797
|
+
| Memories | Build Time | Search Time (median) | Database Size |
|
|
798
|
+
|----------|-----------|----------------------|---------------|
|
|
799
|
+
| 100 | 0.28s | 10.6ms | ~150KB |
|
|
800
|
+
| 500 | ~5s | 65.2ms | ~700KB |
|
|
801
|
+
| 1,000 | 10.6s | 124.3ms | 1.50 MB |
|
|
802
|
+
| 5,000 | 277s | 1,172ms | ~6.8 MB |
|
|
803
803
|
|
|
804
804
|
### Scaling Strategies
|
|
805
805
|
|
|
@@ -245,7 +245,7 @@ Priority order:
|
|
|
245
245
|
- Tier 2 (40 memories @ 10KB): 400KB
|
|
246
246
|
- Tier 3 (30 memories @ 2KB): 60KB
|
|
247
247
|
- **Total: 1.96MB (61% reduction)**
|
|
248
|
-
- **Search time:
|
|
248
|
+
- **Search time: Sub-11ms median for typical databases** (only scan Tier 1+2, see wiki Performance Benchmarks)
|
|
249
249
|
- **Memory load: 1.9MB** (Tier 3 loaded on-demand)
|
|
250
250
|
|
|
251
251
|
### Space Savings Scale
|
|
@@ -418,6 +418,7 @@ weights = {'bm25': 0.4, 'semantic': 0.3, 'graph': 0.3} # Default
|
|
|
418
418
|
- Index time is one-time cost
|
|
419
419
|
- Search time scales sub-linearly (inverted index efficiency)
|
|
420
420
|
- Hybrid search includes fusion overhead (~10-15ms)
|
|
421
|
+
- These are projected estimates for the optional BM25 engine. See wiki Performance Benchmarks for measured end-to-end search latency.
|
|
421
422
|
|
|
422
423
|
---
|
|
423
424
|
|
package/mcp_server.py
CHANGED
|
@@ -72,6 +72,70 @@ try:
|
|
|
72
72
|
except ImportError:
|
|
73
73
|
LEARNING_AVAILABLE = False
|
|
74
74
|
|
|
75
|
+
# ============================================================================
|
|
76
|
+
# Synthetic Bootstrap Auto-Trigger (v2.7 — P1-12)
|
|
77
|
+
# Runs ONCE on first recall if: memory count > 50, no model, LightGBM available.
|
|
78
|
+
# Spawns in background thread — never blocks recall. All errors swallowed.
|
|
79
|
+
# ============================================================================
|
|
80
|
+
|
|
81
|
+
_bootstrap_checked = False
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _maybe_bootstrap():
|
|
85
|
+
"""Check if synthetic bootstrap is needed and run it in a background thread.
|
|
86
|
+
|
|
87
|
+
Called once from the first recall invocation. Sets _bootstrap_checked = True
|
|
88
|
+
immediately to prevent re-entry. The actual bootstrap runs in a daemon thread
|
|
89
|
+
so it never blocks the recall response.
|
|
90
|
+
|
|
91
|
+
Conditions for bootstrap:
|
|
92
|
+
1. LEARNING_AVAILABLE and ML_RANKING_AVAILABLE flags are True
|
|
93
|
+
2. SyntheticBootstrapper.should_bootstrap() returns True (checks:
|
|
94
|
+
- LightGBM + NumPy installed
|
|
95
|
+
- No existing model file at ~/.claude-memory/models/ranker.txt
|
|
96
|
+
- Memory count > 50)
|
|
97
|
+
|
|
98
|
+
CRITICAL: This function wraps everything in try/except. Bootstrap failure
|
|
99
|
+
must NEVER break recall. It is purely an optimization — first-time ML
|
|
100
|
+
model creation so users don't have to wait 200+ recalls for personalization.
|
|
101
|
+
"""
|
|
102
|
+
global _bootstrap_checked
|
|
103
|
+
_bootstrap_checked = True # Set immediately to prevent re-entry
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
if not LEARNING_AVAILABLE:
|
|
107
|
+
return
|
|
108
|
+
if not ML_RANKING_AVAILABLE:
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
from learning.synthetic_bootstrap import SyntheticBootstrapper
|
|
112
|
+
bootstrapper = SyntheticBootstrapper(memory_db_path=DB_PATH)
|
|
113
|
+
|
|
114
|
+
if not bootstrapper.should_bootstrap():
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
# Run bootstrap in background thread — never block recall
|
|
118
|
+
import threading
|
|
119
|
+
|
|
120
|
+
def _run_bootstrap():
|
|
121
|
+
try:
|
|
122
|
+
result = bootstrapper.bootstrap_model()
|
|
123
|
+
if result:
|
|
124
|
+
import logging
|
|
125
|
+
logging.getLogger("superlocalmemory.mcp").info(
|
|
126
|
+
"Synthetic bootstrap complete: %d samples",
|
|
127
|
+
result.get('training_samples', 0)
|
|
128
|
+
)
|
|
129
|
+
except Exception:
|
|
130
|
+
pass # Bootstrap failure is never critical
|
|
131
|
+
|
|
132
|
+
thread = threading.Thread(target=_run_bootstrap, daemon=True)
|
|
133
|
+
thread.start()
|
|
134
|
+
|
|
135
|
+
except Exception:
|
|
136
|
+
pass # Any failure in bootstrap setup is swallowed silently
|
|
137
|
+
|
|
138
|
+
|
|
75
139
|
def _sanitize_error(error: Exception) -> str:
|
|
76
140
|
"""Strip internal paths and structure from error messages."""
|
|
77
141
|
msg = str(error)
|
|
@@ -356,6 +420,10 @@ async def recall(
|
|
|
356
420
|
else:
|
|
357
421
|
results = store.search(query, limit=limit)
|
|
358
422
|
|
|
423
|
+
# v2.7: Auto-trigger synthetic bootstrap on first recall (P1-12)
|
|
424
|
+
if not _bootstrap_checked:
|
|
425
|
+
_maybe_bootstrap()
|
|
426
|
+
|
|
359
427
|
# v2.7: Learning-based re-ranking (optional, graceful fallback)
|
|
360
428
|
if LEARNING_AVAILABLE:
|
|
361
429
|
try:
|
|
@@ -868,6 +936,15 @@ async def search(query: str) -> dict:
|
|
|
868
936
|
store = get_store()
|
|
869
937
|
raw_results = store.search(query, limit=20)
|
|
870
938
|
|
|
939
|
+
# v2.7: Learning-based re-ranking (optional, graceful fallback)
|
|
940
|
+
if LEARNING_AVAILABLE:
|
|
941
|
+
try:
|
|
942
|
+
ranker = get_adaptive_ranker()
|
|
943
|
+
if ranker:
|
|
944
|
+
raw_results = ranker.rerank(raw_results, query)
|
|
945
|
+
except Exception:
|
|
946
|
+
pass # Re-ranking failure must never break search
|
|
947
|
+
|
|
871
948
|
results = []
|
|
872
949
|
for r in raw_results:
|
|
873
950
|
if r.get('score', 0) < 0.2:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "2.7.
|
|
3
|
+
"version": "2.7.3",
|
|
4
4
|
"description": "Your AI Finally Remembers You - Local-first intelligent memory system for AI assistants. Works with Claude, Cursor, Windsurf, VS Code/Copilot, Codex, and 17+ AI tools. 100% local, zero cloud dependencies.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/src/agent_registry.py
CHANGED
|
@@ -98,7 +98,7 @@ class AgentRegistry:
|
|
|
98
98
|
last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
99
99
|
memories_written INTEGER DEFAULT 0,
|
|
100
100
|
memories_recalled INTEGER DEFAULT 0,
|
|
101
|
-
trust_score REAL DEFAULT
|
|
101
|
+
trust_score REAL DEFAULT 0.667,
|
|
102
102
|
metadata TEXT DEFAULT '{}'
|
|
103
103
|
)
|
|
104
104
|
''')
|
|
@@ -126,7 +126,7 @@ class AgentRegistry:
|
|
|
126
126
|
last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
127
127
|
memories_written INTEGER DEFAULT 0,
|
|
128
128
|
memories_recalled INTEGER DEFAULT 0,
|
|
129
|
-
trust_score REAL DEFAULT
|
|
129
|
+
trust_score REAL DEFAULT 0.667,
|
|
130
130
|
metadata TEXT DEFAULT '{}'
|
|
131
131
|
)
|
|
132
132
|
''')
|
|
@@ -150,7 +150,7 @@ class AgentRegistry:
|
|
|
150
150
|
Register or update an agent in the registry.
|
|
151
151
|
|
|
152
152
|
If the agent already exists, updates last_seen and metadata.
|
|
153
|
-
If new, creates the entry with trust_score=1.
|
|
153
|
+
If new, creates the entry with trust_score=0.667 (Beta(2,1) prior).
|
|
154
154
|
|
|
155
155
|
Args:
|
|
156
156
|
agent_id: Unique identifier (e.g., "mcp:claude-desktop")
|
package/src/graph_engine.py
CHANGED
|
@@ -297,12 +297,11 @@ class ClusterBuilder:
|
|
|
297
297
|
Returns:
|
|
298
298
|
Number of clusters created
|
|
299
299
|
"""
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
|
|
300
|
+
if not IGRAPH_AVAILABLE:
|
|
301
|
+
logger.warning("igraph/leidenalg not installed. Graph clustering disabled. Install with: pip3 install python-igraph leidenalg")
|
|
302
|
+
return 0
|
|
303
|
+
import igraph as ig
|
|
304
|
+
import leidenalg
|
|
306
305
|
|
|
307
306
|
conn = sqlite3.connect(self.db_path)
|
|
308
307
|
cursor = conn.cursor()
|
|
@@ -457,11 +456,11 @@ class ClusterBuilder:
|
|
|
457
456
|
Returns:
|
|
458
457
|
Dictionary with hierarchical clustering statistics
|
|
459
458
|
"""
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
459
|
+
if not IGRAPH_AVAILABLE:
|
|
460
|
+
logger.warning("igraph/leidenalg not installed. Hierarchical clustering disabled. Install with: pip3 install python-igraph leidenalg")
|
|
461
|
+
return {'subclusters_created': 0, 'depth_reached': 0}
|
|
462
|
+
import igraph as ig
|
|
463
|
+
import leidenalg
|
|
465
464
|
|
|
466
465
|
conn = sqlite3.connect(self.db_path)
|
|
467
466
|
cursor = conn.cursor()
|
|
@@ -512,6 +511,8 @@ class ClusterBuilder:
|
|
|
512
511
|
profile: str, min_size: int, max_depth: int,
|
|
513
512
|
current_depth: int) -> Tuple[int, int]:
|
|
514
513
|
"""Recursively sub-cluster a community using Leiden."""
|
|
514
|
+
if not IGRAPH_AVAILABLE:
|
|
515
|
+
return 0, current_depth - 1
|
|
515
516
|
import igraph as ig
|
|
516
517
|
import leidenalg
|
|
517
518
|
|
|
@@ -1038,6 +1039,9 @@ class GraphEngine:
|
|
|
1038
1039
|
'summaries_generated': summaries,
|
|
1039
1040
|
'time_seconds': round(elapsed, 2)
|
|
1040
1041
|
}
|
|
1042
|
+
if not IGRAPH_AVAILABLE:
|
|
1043
|
+
stats['warning'] = 'igraph/leidenalg not installed — graph built without clustering. Install with: pip3 install python-igraph leidenalg'
|
|
1044
|
+
|
|
1041
1045
|
|
|
1042
1046
|
logger.info(f"Graph build complete: {stats}")
|
|
1043
1047
|
return stats
|
|
@@ -12,22 +12,23 @@ Attribution must be preserved in all copies or derivatives.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
"""
|
|
15
|
-
FeatureExtractor — Extracts
|
|
15
|
+
FeatureExtractor — Extracts 10-dimensional feature vectors for candidate memories.
|
|
16
16
|
|
|
17
17
|
Each memory retrieved during recall gets a feature vector that feeds into
|
|
18
18
|
the AdaptiveRanker. In Phase 1 (rule-based), features drive boosting weights.
|
|
19
19
|
In Phase 2 (ML), features become LightGBM input columns.
|
|
20
20
|
|
|
21
|
-
Feature Vector (
|
|
22
|
-
[0] bm25_score
|
|
23
|
-
[1] tfidf_score
|
|
24
|
-
[2] tech_match
|
|
25
|
-
[3] project_match
|
|
26
|
-
[4] workflow_fit
|
|
27
|
-
[5] source_quality
|
|
28
|
-
[6] importance_norm
|
|
29
|
-
[7] recency_score
|
|
30
|
-
[8] access_frequency
|
|
21
|
+
Feature Vector (10 dimensions):
|
|
22
|
+
[0] bm25_score — Existing retrieval score from search results
|
|
23
|
+
[1] tfidf_score — TF-IDF cosine similarity from search results
|
|
24
|
+
[2] tech_match — Does memory match user's tech preferences?
|
|
25
|
+
[3] project_match — Is memory from the current project?
|
|
26
|
+
[4] workflow_fit — Does memory fit current workflow phase?
|
|
27
|
+
[5] source_quality — Quality score of the source that created this memory
|
|
28
|
+
[6] importance_norm — Normalized importance (importance / 10.0)
|
|
29
|
+
[7] recency_score — Exponential decay based on age (180-day half-life)
|
|
30
|
+
[8] access_frequency — How often this memory was accessed (capped at 1.0)
|
|
31
|
+
[9] pattern_confidence — Max Beta-Binomial confidence from learned patterns
|
|
31
32
|
|
|
32
33
|
Design Principles:
|
|
33
34
|
- All features normalized to [0.0, 1.0] range for ML compatibility
|
|
@@ -59,6 +60,7 @@ FEATURE_NAMES = [
|
|
|
59
60
|
'importance_norm', # 6: Normalized importance (importance / 10.0)
|
|
60
61
|
'recency_score', # 7: Exponential decay based on age
|
|
61
62
|
'access_frequency', # 8: How often this memory was accessed (capped at 1.0)
|
|
63
|
+
'pattern_confidence', # 9: Max Beta-Binomial confidence from learned patterns
|
|
62
64
|
]
|
|
63
65
|
|
|
64
66
|
NUM_FEATURES = len(FEATURE_NAMES)
|
|
@@ -100,7 +102,7 @@ _MAX_ACCESS_COUNT = 10
|
|
|
100
102
|
|
|
101
103
|
class FeatureExtractor:
|
|
102
104
|
"""
|
|
103
|
-
Extracts
|
|
105
|
+
Extracts 10-dimensional feature vectors for candidate memories.
|
|
104
106
|
|
|
105
107
|
Usage:
|
|
106
108
|
extractor = FeatureExtractor()
|
|
@@ -111,7 +113,7 @@ class FeatureExtractor:
|
|
|
111
113
|
workflow_phase='testing',
|
|
112
114
|
)
|
|
113
115
|
features = extractor.extract_batch(memories, query="search optimization")
|
|
114
|
-
# features is List[List[float]], shape (n_memories,
|
|
116
|
+
# features is List[List[float]], shape (n_memories, 10)
|
|
115
117
|
"""
|
|
116
118
|
|
|
117
119
|
FEATURE_NAMES = FEATURE_NAMES
|
|
@@ -125,6 +127,8 @@ class FeatureExtractor:
|
|
|
125
127
|
self._current_project_lower: Optional[str] = None
|
|
126
128
|
self._workflow_phase: Optional[str] = None
|
|
127
129
|
self._workflow_keywords: List[str] = []
|
|
130
|
+
# Pattern confidence cache: maps lowercased pattern value -> confidence
|
|
131
|
+
self._pattern_cache: Dict[str, float] = {}
|
|
128
132
|
|
|
129
133
|
def set_context(
|
|
130
134
|
self,
|
|
@@ -132,6 +136,7 @@ class FeatureExtractor:
|
|
|
132
136
|
tech_preferences: Optional[Dict[str, dict]] = None,
|
|
133
137
|
current_project: Optional[str] = None,
|
|
134
138
|
workflow_phase: Optional[str] = None,
|
|
139
|
+
pattern_confidences: Optional[Dict[str, float]] = None,
|
|
135
140
|
):
|
|
136
141
|
"""
|
|
137
142
|
Set context for feature extraction. Called once per recall query.
|
|
@@ -146,6 +151,9 @@ class FeatureExtractor:
|
|
|
146
151
|
From cross_project_aggregator or pattern_learner.
|
|
147
152
|
current_project: Name of the currently active project (if detected).
|
|
148
153
|
workflow_phase: Current workflow phase (planning, coding, testing, etc).
|
|
154
|
+
pattern_confidences: Map of lowercased pattern value -> confidence (0.0-1.0).
|
|
155
|
+
From pattern_learner.PatternStore.get_patterns().
|
|
156
|
+
Used for feature [9] pattern_confidence.
|
|
149
157
|
"""
|
|
150
158
|
self._source_scores = source_scores or {}
|
|
151
159
|
self._tech_preferences = tech_preferences or {}
|
|
@@ -166,9 +174,12 @@ class FeatureExtractor:
|
|
|
166
174
|
if workflow_phase else []
|
|
167
175
|
)
|
|
168
176
|
|
|
177
|
+
# Cache pattern confidences for feature [9]
|
|
178
|
+
self._pattern_cache = pattern_confidences or {}
|
|
179
|
+
|
|
169
180
|
def extract_features(self, memory: dict, query: str) -> List[float]:
|
|
170
181
|
"""
|
|
171
|
-
Extract
|
|
182
|
+
Extract 10-dimensional feature vector for a single memory.
|
|
172
183
|
|
|
173
184
|
Args:
|
|
174
185
|
memory: Memory dict from search results. Expected keys:
|
|
@@ -177,7 +188,7 @@ class FeatureExtractor:
|
|
|
177
188
|
query: The recall query string.
|
|
178
189
|
|
|
179
190
|
Returns:
|
|
180
|
-
List of
|
|
191
|
+
List of 10 floats in [0.0, 1.0] range, one per feature.
|
|
181
192
|
"""
|
|
182
193
|
return [
|
|
183
194
|
self._compute_bm25_score(memory),
|
|
@@ -189,6 +200,7 @@ class FeatureExtractor:
|
|
|
189
200
|
self._compute_importance_norm(memory),
|
|
190
201
|
self._compute_recency_score(memory),
|
|
191
202
|
self._compute_access_frequency(memory),
|
|
203
|
+
self._compute_pattern_confidence(memory),
|
|
192
204
|
]
|
|
193
205
|
|
|
194
206
|
def extract_batch(
|
|
@@ -204,7 +216,7 @@ class FeatureExtractor:
|
|
|
204
216
|
query: The recall query string.
|
|
205
217
|
|
|
206
218
|
Returns:
|
|
207
|
-
List of feature vectors (List[List[float]]), shape (n,
|
|
219
|
+
List of feature vectors (List[List[float]]), shape (n, 10).
|
|
208
220
|
Returns empty list if memories is empty.
|
|
209
221
|
"""
|
|
210
222
|
if not memories:
|
|
@@ -447,6 +459,55 @@ class FeatureExtractor:
|
|
|
447
459
|
return min(access_count / float(_MAX_ACCESS_COUNT), 1.0)
|
|
448
460
|
|
|
449
461
|
|
|
462
|
+
def _compute_pattern_confidence(self, memory: dict) -> float:
|
|
463
|
+
"""
|
|
464
|
+
Compute max Beta-Binomial confidence from learned patterns matching this memory.
|
|
465
|
+
|
|
466
|
+
Looks up the cached pattern_confidences (set via set_context) and checks
|
|
467
|
+
if any pattern value appears in the memory's content or tags. Returns the
|
|
468
|
+
maximum confidence among all matching patterns.
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
Max confidence (0.0-1.0) from matching patterns
|
|
472
|
+
0.5 if no patterns loaded (neutral — unknown)
|
|
473
|
+
0.0 if patterns loaded but none match
|
|
474
|
+
"""
|
|
475
|
+
if not self._pattern_cache:
|
|
476
|
+
return 0.5 # No patterns available — neutral
|
|
477
|
+
|
|
478
|
+
content = memory.get('content', '')
|
|
479
|
+
if not content:
|
|
480
|
+
return 0.0
|
|
481
|
+
|
|
482
|
+
content_lower = content.lower()
|
|
483
|
+
|
|
484
|
+
# Also check tags
|
|
485
|
+
tags_str = ''
|
|
486
|
+
tags = memory.get('tags', [])
|
|
487
|
+
if isinstance(tags, list):
|
|
488
|
+
tags_str = ' '.join(t.lower() for t in tags)
|
|
489
|
+
elif isinstance(tags, str):
|
|
490
|
+
tags_str = tags.lower()
|
|
491
|
+
|
|
492
|
+
searchable = content_lower + ' ' + tags_str
|
|
493
|
+
|
|
494
|
+
max_confidence = 0.0
|
|
495
|
+
for pattern_value, confidence in self._pattern_cache.items():
|
|
496
|
+
# Pattern values are already lowercased in the cache
|
|
497
|
+
pattern_lower = pattern_value.lower() if pattern_value else ''
|
|
498
|
+
if not pattern_lower:
|
|
499
|
+
continue
|
|
500
|
+
# Word-boundary check for short patterns to avoid false positives
|
|
501
|
+
if len(pattern_lower) <= 3:
|
|
502
|
+
if re.search(r'\b' + re.escape(pattern_lower) + r'\b', searchable):
|
|
503
|
+
max_confidence = max(max_confidence, confidence)
|
|
504
|
+
else:
|
|
505
|
+
if pattern_lower in searchable:
|
|
506
|
+
max_confidence = max(max_confidence, confidence)
|
|
507
|
+
|
|
508
|
+
return max(0.0, min(max_confidence, 1.0))
|
|
509
|
+
|
|
510
|
+
|
|
450
511
|
# ============================================================================
|
|
451
512
|
# Module-level convenience functions
|
|
452
513
|
# ============================================================================
|
|
@@ -122,9 +122,16 @@ class FeedbackCollector:
|
|
|
122
122
|
"""
|
|
123
123
|
Args:
|
|
124
124
|
learning_db: LearningDB instance for persisting feedback.
|
|
125
|
-
If None,
|
|
125
|
+
If None, auto-creates a LearningDB instance.
|
|
126
126
|
"""
|
|
127
|
-
|
|
127
|
+
if learning_db is None:
|
|
128
|
+
try:
|
|
129
|
+
from .learning_db import LearningDB
|
|
130
|
+
self.learning_db = LearningDB()
|
|
131
|
+
except Exception:
|
|
132
|
+
self.learning_db = None
|
|
133
|
+
else:
|
|
134
|
+
self.learning_db = learning_db
|
|
128
135
|
|
|
129
136
|
# In-memory buffer for passive decay tracking.
|
|
130
137
|
# Structure: {query_hash: {memory_id: times_returned_count}}
|
|
@@ -258,7 +258,7 @@ class TestGenerateSyntheticData:
|
|
|
258
258
|
assert "label" in r
|
|
259
259
|
assert "source" in r
|
|
260
260
|
assert "features" in r
|
|
261
|
-
assert len(r["features"]) ==
|
|
261
|
+
assert len(r["features"]) == 10 # 10-dimensional feature vector
|
|
262
262
|
|
|
263
263
|
def test_labels_in_range(self, bootstrapper_with_data):
|
|
264
264
|
records = bootstrapper_with_data.generate_synthetic_training_data()
|
package/src/trust_scorer.py
CHANGED
|
@@ -12,10 +12,23 @@ Attribution must be preserved in all copies or derivatives.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
"""
|
|
15
|
-
TrustScorer —
|
|
15
|
+
TrustScorer — Bayesian Beta-Binomial trust scoring for AI agents.
|
|
16
|
+
|
|
17
|
+
Scoring Model:
|
|
18
|
+
Each agent's trust is modeled as a Beta(alpha, beta) distribution.
|
|
19
|
+
- alpha accumulates evidence of trustworthy behavior
|
|
20
|
+
- beta accumulates evidence of untrustworthy behavior
|
|
21
|
+
- Trust score = alpha / (alpha + beta) (posterior mean)
|
|
22
|
+
|
|
23
|
+
Prior: Beta(2.0, 1.0) → initial trust = 0.667
|
|
24
|
+
This gives new agents a positive-but-not-maximal starting trust,
|
|
25
|
+
well above the 0.3 enforcement threshold but with room to grow.
|
|
26
|
+
|
|
27
|
+
This follows the MACLA Beta-Binomial approach (arXiv:2512.18950)
|
|
28
|
+
already used in pattern_learner.py for confidence scoring.
|
|
16
29
|
|
|
17
30
|
v2.5 BEHAVIOR (this version):
|
|
18
|
-
- All agents start at
|
|
31
|
+
- All agents start at Beta(2.0, 1.0) → trust 0.667
|
|
19
32
|
- Signals are collected silently (no enforcement, no ranking, no blocking)
|
|
20
33
|
- Trust scores are updated in agent_registry.trust_score
|
|
21
34
|
- Dashboard shows scores but they don't affect recall ordering yet
|
|
@@ -30,31 +43,26 @@ v3.0 BEHAVIOR (future):
|
|
|
30
43
|
- Admin approval workflow for untrusted agents
|
|
31
44
|
|
|
32
45
|
Trust Signals (all silently collected):
|
|
33
|
-
POSITIVE (increase trust):
|
|
46
|
+
POSITIVE (increase alpha — build trust):
|
|
34
47
|
- Memory recalled by other agents (cross-agent validation)
|
|
35
48
|
- Memory updated (shows ongoing relevance)
|
|
36
49
|
- High importance memories (agent writes valuable content)
|
|
37
50
|
- Consistent write patterns (not spam-like)
|
|
38
51
|
|
|
39
|
-
NEGATIVE (
|
|
52
|
+
NEGATIVE (increase beta — erode trust):
|
|
40
53
|
- Memory deleted shortly after creation (low quality)
|
|
41
54
|
- Very high write volume in short time (potential spam/poisoning)
|
|
42
55
|
- Content flagged or overwritten by user
|
|
43
56
|
|
|
44
57
|
NEUTRAL:
|
|
45
|
-
- Normal read/write patterns
|
|
58
|
+
- Normal read/write patterns (tiny alpha nudge to reward activity)
|
|
46
59
|
- Agent disconnects/reconnects
|
|
47
60
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
by
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
decay_factor = 1 / (1 + signal_count * 0.01) # Stabilizes over time
|
|
54
|
-
|
|
55
|
-
This means early signals have more impact, and the score converges
|
|
56
|
-
as more data is collected. Similar to MACLA Beta-Binomial approach
|
|
57
|
-
(arXiv:2512.18950) but simplified for local computation.
|
|
61
|
+
Decay:
|
|
62
|
+
Every DECAY_INTERVAL signals per agent, both alpha and beta are
|
|
63
|
+
multiplied by DECAY_FACTOR (0.995). This slowly forgets very old
|
|
64
|
+
signals so recent behavior matters more. Floors prevent total
|
|
65
|
+
information loss: alpha >= 1.0, beta >= 0.5.
|
|
58
66
|
|
|
59
67
|
Security (OWASP for Agentic AI):
|
|
60
68
|
- Memory poisoning (#1 threat): Trust scoring is the first defense layer
|
|
@@ -64,7 +72,6 @@ Security (OWASP for Agentic AI):
|
|
|
64
72
|
|
|
65
73
|
import json
|
|
66
74
|
import logging
|
|
67
|
-
import math
|
|
68
75
|
import threading
|
|
69
76
|
from datetime import datetime, timedelta
|
|
70
77
|
from pathlib import Path
|
|
@@ -72,24 +79,59 @@ from typing import Optional, Dict, List
|
|
|
72
79
|
|
|
73
80
|
logger = logging.getLogger("superlocalmemory.trust")
|
|
74
81
|
|
|
75
|
-
#
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
# Beta-Binomial signal weights
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Positive signals increment alpha (building trust).
|
|
86
|
+
# Negative signals increment beta (eroding trust).
|
|
87
|
+
# Neutral signals give a tiny alpha nudge to reward normal activity.
|
|
88
|
+
#
|
|
89
|
+
# Asymmetry: negative weights are larger than positive weights.
|
|
90
|
+
# This means it's harder to build trust than to lose it — the system
|
|
91
|
+
# is intentionally skeptical. One poisoning event takes many good
|
|
92
|
+
# actions to recover from.
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
SIGNAL_WEIGHTS = {
|
|
96
|
+
# Positive signals → alpha += weight
|
|
97
|
+
"memory_recalled_by_others": ("positive", 0.30), # cross-agent validation
|
|
98
|
+
"memory_updated": ("positive", 0.15), # ongoing relevance
|
|
99
|
+
"high_importance_write": ("positive", 0.20), # valuable content (importance >= 7)
|
|
100
|
+
"consistent_pattern": ("positive", 0.15), # stable write behavior
|
|
101
|
+
|
|
102
|
+
# Negative signals → beta += weight
|
|
103
|
+
"quick_delete": ("negative", 0.50), # deleted within 1 hour
|
|
104
|
+
"high_volume_burst": ("negative", 0.40), # >20 writes in 5 minutes
|
|
105
|
+
"content_overwritten_by_user": ("negative", 0.25), # user had to fix output
|
|
106
|
+
|
|
107
|
+
# Neutral signals → tiny alpha nudge
|
|
108
|
+
"normal_write": ("neutral", 0.01),
|
|
109
|
+
"normal_recall": ("neutral", 0.01),
|
|
91
110
|
}
|
|
92
111
|
|
|
112
|
+
# Backward-compatible: expose SIGNAL_DELTAS as a derived dict so that
|
|
113
|
+
# bm6_trust.py (which imports SIGNAL_DELTAS) and any other consumer
|
|
114
|
+
# continues to work. The values represent the *direction* and *magnitude*
|
|
115
|
+
# of each signal: positive for alpha, negative for beta, zero for neutral.
|
|
116
|
+
SIGNAL_DELTAS = {}
|
|
117
|
+
for _sig, (_direction, _weight) in SIGNAL_WEIGHTS.items():
|
|
118
|
+
if _direction == "positive":
|
|
119
|
+
SIGNAL_DELTAS[_sig] = +_weight
|
|
120
|
+
elif _direction == "negative":
|
|
121
|
+
SIGNAL_DELTAS[_sig] = -_weight
|
|
122
|
+
else:
|
|
123
|
+
SIGNAL_DELTAS[_sig] = 0.0
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Beta prior and decay parameters
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
INITIAL_ALPHA = 2.0 # Slight positive prior
|
|
129
|
+
INITIAL_BETA = 1.0 # → initial trust = 2/(2+1) = 0.667
|
|
130
|
+
DECAY_FACTOR = 0.995 # Multiply alpha & beta every DECAY_INTERVAL signals
|
|
131
|
+
DECAY_INTERVAL = 50 # Apply decay every N signals per agent
|
|
132
|
+
ALPHA_FLOOR = 1.0 # Never decay alpha below this
|
|
133
|
+
BETA_FLOOR = 0.5 # Never decay beta below this
|
|
134
|
+
|
|
93
135
|
# Thresholds
|
|
94
136
|
QUICK_DELETE_HOURS = 1 # Delete within 1 hour = negative signal
|
|
95
137
|
BURST_THRESHOLD = 20 # >20 writes in burst window = negative
|
|
@@ -98,9 +140,12 @@ BURST_WINDOW_MINUTES = 5 # Burst detection window
|
|
|
98
140
|
|
|
99
141
|
class TrustScorer:
|
|
100
142
|
"""
|
|
101
|
-
|
|
143
|
+
Bayesian Beta-Binomial trust scorer for AI agents.
|
|
144
|
+
|
|
145
|
+
Each agent is modeled as Beta(alpha, beta). Positive signals
|
|
146
|
+
increment alpha, negative signals increment beta. The trust
|
|
147
|
+
score is the posterior mean: alpha / (alpha + beta).
|
|
102
148
|
|
|
103
|
-
v2.5: Collection only, no enforcement. All agents start at 1.0.
|
|
104
149
|
Thread-safe singleton per database path.
|
|
105
150
|
"""
|
|
106
151
|
|
|
@@ -136,19 +181,26 @@ class TrustScorer:
|
|
|
136
181
|
self._write_timestamps: Dict[str, list] = {}
|
|
137
182
|
self._timestamps_lock = threading.Lock()
|
|
138
183
|
|
|
139
|
-
# Signal count per agent (for decay
|
|
184
|
+
# Signal count per agent (for decay interval tracking)
|
|
140
185
|
self._signal_counts: Dict[str, int] = {}
|
|
141
186
|
|
|
187
|
+
# In-memory cache of Beta parameters per agent
|
|
188
|
+
# Key: agent_id, Value: (alpha, beta)
|
|
189
|
+
self._beta_params: Dict[str, tuple] = {}
|
|
190
|
+
self._beta_lock = threading.Lock()
|
|
191
|
+
|
|
142
192
|
self._init_schema()
|
|
143
|
-
logger.info("TrustScorer initialized (
|
|
193
|
+
logger.info("TrustScorer initialized (Beta-Binomial — alpha=%.1f, beta=%.1f prior)",
|
|
194
|
+
INITIAL_ALPHA, INITIAL_BETA)
|
|
144
195
|
|
|
145
196
|
def _init_schema(self):
|
|
146
|
-
"""Create trust_signals table
|
|
197
|
+
"""Create trust_signals table and add alpha/beta columns to agent_registry."""
|
|
147
198
|
try:
|
|
148
199
|
from db_connection_manager import DbConnectionManager
|
|
149
200
|
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
150
201
|
|
|
151
202
|
def _create(conn):
|
|
203
|
+
# Trust signals audit trail
|
|
152
204
|
conn.execute('''
|
|
153
205
|
CREATE TABLE IF NOT EXISTS trust_signals (
|
|
154
206
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -169,6 +221,18 @@ class TrustScorer:
|
|
|
169
221
|
CREATE INDEX IF NOT EXISTS idx_trust_created
|
|
170
222
|
ON trust_signals(created_at)
|
|
171
223
|
''')
|
|
224
|
+
|
|
225
|
+
# Add trust_alpha and trust_beta columns to agent_registry
|
|
226
|
+
# (backward compatible — old databases get these columns added)
|
|
227
|
+
for col_name, col_default in [("trust_alpha", INITIAL_ALPHA),
|
|
228
|
+
("trust_beta", INITIAL_BETA)]:
|
|
229
|
+
try:
|
|
230
|
+
conn.execute(
|
|
231
|
+
f'ALTER TABLE agent_registry ADD COLUMN {col_name} REAL DEFAULT {col_default}'
|
|
232
|
+
)
|
|
233
|
+
except Exception:
|
|
234
|
+
pass # Column already exists
|
|
235
|
+
|
|
172
236
|
conn.commit()
|
|
173
237
|
|
|
174
238
|
mgr.execute_write(_create)
|
|
@@ -189,11 +253,108 @@ class TrustScorer:
|
|
|
189
253
|
''')
|
|
190
254
|
conn.execute('CREATE INDEX IF NOT EXISTS idx_trust_agent ON trust_signals(agent_id)')
|
|
191
255
|
conn.execute('CREATE INDEX IF NOT EXISTS idx_trust_created ON trust_signals(created_at)')
|
|
256
|
+
|
|
257
|
+
# Add trust_alpha and trust_beta columns (backward compatible)
|
|
258
|
+
for col_name, col_default in [("trust_alpha", INITIAL_ALPHA),
|
|
259
|
+
("trust_beta", INITIAL_BETA)]:
|
|
260
|
+
try:
|
|
261
|
+
conn.execute(
|
|
262
|
+
f'ALTER TABLE agent_registry ADD COLUMN {col_name} REAL DEFAULT {col_default}'
|
|
263
|
+
)
|
|
264
|
+
except sqlite3.OperationalError:
|
|
265
|
+
pass # Column already exists
|
|
266
|
+
|
|
192
267
|
conn.commit()
|
|
193
268
|
conn.close()
|
|
194
269
|
|
|
195
270
|
# =========================================================================
|
|
196
|
-
#
|
|
271
|
+
# Beta Parameter Management
|
|
272
|
+
# =========================================================================
|
|
273
|
+
|
|
274
|
+
def _get_beta_params(self, agent_id: str) -> tuple:
|
|
275
|
+
"""
|
|
276
|
+
Get (alpha, beta) for an agent. Checks in-memory cache first,
|
|
277
|
+
then database, then falls back to prior defaults.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
(alpha, beta) tuple
|
|
281
|
+
"""
|
|
282
|
+
with self._beta_lock:
|
|
283
|
+
if agent_id in self._beta_params:
|
|
284
|
+
return self._beta_params[agent_id]
|
|
285
|
+
|
|
286
|
+
# Not in cache — read from database
|
|
287
|
+
alpha, beta = None, None
|
|
288
|
+
try:
|
|
289
|
+
from db_connection_manager import DbConnectionManager
|
|
290
|
+
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
291
|
+
|
|
292
|
+
with mgr.read_connection() as conn:
|
|
293
|
+
cursor = conn.cursor()
|
|
294
|
+
cursor.execute(
|
|
295
|
+
"SELECT trust_alpha, trust_beta FROM agent_registry WHERE agent_id = ?",
|
|
296
|
+
(agent_id,)
|
|
297
|
+
)
|
|
298
|
+
row = cursor.fetchone()
|
|
299
|
+
if row:
|
|
300
|
+
alpha = row[0]
|
|
301
|
+
beta = row[1]
|
|
302
|
+
except Exception:
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
# Fall back to defaults if NULL or missing
|
|
306
|
+
if alpha is None or beta is None:
|
|
307
|
+
alpha = INITIAL_ALPHA
|
|
308
|
+
beta = INITIAL_BETA
|
|
309
|
+
|
|
310
|
+
with self._beta_lock:
|
|
311
|
+
self._beta_params[agent_id] = (alpha, beta)
|
|
312
|
+
|
|
313
|
+
return (alpha, beta)
|
|
314
|
+
|
|
315
|
+
def _set_beta_params(self, agent_id: str, alpha: float, beta: float):
|
|
316
|
+
"""
|
|
317
|
+
Update (alpha, beta) in cache and persist to agent_registry.
|
|
318
|
+
Also computes and stores the derived trust_score = alpha/(alpha+beta).
|
|
319
|
+
"""
|
|
320
|
+
trust_score = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.0
|
|
321
|
+
|
|
322
|
+
with self._beta_lock:
|
|
323
|
+
self._beta_params[agent_id] = (alpha, beta)
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
from db_connection_manager import DbConnectionManager
|
|
327
|
+
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
328
|
+
|
|
329
|
+
def _update(conn):
|
|
330
|
+
conn.execute(
|
|
331
|
+
"""UPDATE agent_registry
|
|
332
|
+
SET trust_score = ?, trust_alpha = ?, trust_beta = ?
|
|
333
|
+
WHERE agent_id = ?""",
|
|
334
|
+
(round(trust_score, 4), round(alpha, 4), round(beta, 4), agent_id)
|
|
335
|
+
)
|
|
336
|
+
conn.commit()
|
|
337
|
+
|
|
338
|
+
mgr.execute_write(_update)
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error("Failed to persist Beta params for %s: %s", agent_id, e)
|
|
341
|
+
|
|
342
|
+
def _apply_decay(self, agent_id: str, alpha: float, beta: float) -> tuple:
|
|
343
|
+
"""
|
|
344
|
+
Apply periodic decay to alpha and beta to forget very old signals.
|
|
345
|
+
|
|
346
|
+
Called every DECAY_INTERVAL signals per agent.
|
|
347
|
+
Multiplies both by DECAY_FACTOR with floor constraints.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
(decayed_alpha, decayed_beta)
|
|
351
|
+
"""
|
|
352
|
+
new_alpha = max(ALPHA_FLOOR, alpha * DECAY_FACTOR)
|
|
353
|
+
new_beta = max(BETA_FLOOR, beta * DECAY_FACTOR)
|
|
354
|
+
return (new_alpha, new_beta)
|
|
355
|
+
|
|
356
|
+
# =========================================================================
|
|
357
|
+
# Signal Recording (Beta-Binomial Update)
|
|
197
358
|
# =========================================================================
|
|
198
359
|
|
|
199
360
|
def record_signal(
|
|
@@ -203,50 +364,68 @@ class TrustScorer:
|
|
|
203
364
|
context: Optional[dict] = None,
|
|
204
365
|
) -> bool:
|
|
205
366
|
"""
|
|
206
|
-
Record a trust signal for an agent.
|
|
367
|
+
Record a trust signal for an agent using Beta-Binomial update.
|
|
207
368
|
|
|
208
|
-
|
|
209
|
-
|
|
369
|
+
Positive signals increment alpha (trust evidence).
|
|
370
|
+
Negative signals increment beta (distrust evidence).
|
|
371
|
+
Neutral signals give a tiny alpha nudge.
|
|
372
|
+
|
|
373
|
+
Trust score = alpha / (alpha + beta) — the posterior mean.
|
|
210
374
|
|
|
211
375
|
Args:
|
|
212
376
|
agent_id: Agent that generated the signal
|
|
213
|
-
signal_type: One of
|
|
377
|
+
signal_type: One of SIGNAL_WEIGHTS keys
|
|
214
378
|
context: Additional context (memory_id, etc.)
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
True if signal was recorded successfully
|
|
215
382
|
"""
|
|
216
|
-
if signal_type not in
|
|
383
|
+
if signal_type not in SIGNAL_WEIGHTS:
|
|
217
384
|
logger.warning("Unknown trust signal: %s", signal_type)
|
|
218
|
-
return
|
|
385
|
+
return False
|
|
386
|
+
|
|
387
|
+
direction, weight = SIGNAL_WEIGHTS[signal_type]
|
|
219
388
|
|
|
220
|
-
|
|
389
|
+
# Get current Beta parameters
|
|
390
|
+
alpha, beta = self._get_beta_params(agent_id)
|
|
391
|
+
old_score = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.0
|
|
221
392
|
|
|
222
|
-
#
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
393
|
+
# Apply Beta-Binomial update
|
|
394
|
+
if direction == "positive":
|
|
395
|
+
alpha += weight
|
|
396
|
+
elif direction == "negative":
|
|
397
|
+
beta += weight
|
|
398
|
+
else: # neutral — tiny alpha nudge
|
|
399
|
+
alpha += weight
|
|
226
400
|
|
|
227
|
-
# Apply decay
|
|
228
|
-
count = self._signal_counts.get(agent_id, 0)
|
|
229
|
-
|
|
230
|
-
adjusted_delta = delta * decay
|
|
401
|
+
# Apply periodic decay
|
|
402
|
+
count = self._signal_counts.get(agent_id, 0) + 1
|
|
403
|
+
self._signal_counts[agent_id] = count
|
|
231
404
|
|
|
232
|
-
|
|
233
|
-
|
|
405
|
+
if count % DECAY_INTERVAL == 0:
|
|
406
|
+
alpha, beta = self._apply_decay(agent_id, alpha, beta)
|
|
234
407
|
|
|
235
|
-
#
|
|
236
|
-
|
|
408
|
+
# Compute new trust score (posterior mean)
|
|
409
|
+
new_score = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.0
|
|
410
|
+
|
|
411
|
+
# Compute delta for audit trail (backward compatible with trust_signals table)
|
|
412
|
+
delta = new_score - old_score
|
|
237
413
|
|
|
238
414
|
# Persist signal to audit trail
|
|
239
|
-
self._persist_signal(agent_id, signal_type,
|
|
415
|
+
self._persist_signal(agent_id, signal_type, delta, old_score, new_score, context)
|
|
240
416
|
|
|
241
|
-
#
|
|
242
|
-
|
|
243
|
-
self._update_agent_trust(agent_id, new_score)
|
|
417
|
+
# Persist updated Beta parameters and derived trust_score
|
|
418
|
+
self._set_beta_params(agent_id, alpha, beta)
|
|
244
419
|
|
|
245
420
|
logger.debug(
|
|
246
|
-
"Trust signal: agent=%s, type=%s,
|
|
247
|
-
|
|
421
|
+
"Trust signal: agent=%s, type=%s (%s, w=%.2f), "
|
|
422
|
+
"alpha=%.2f, beta=%.2f, score=%.4f->%.4f",
|
|
423
|
+
agent_id, signal_type, direction, weight,
|
|
424
|
+
alpha, beta, old_score, new_score
|
|
248
425
|
)
|
|
249
426
|
|
|
427
|
+
return True
|
|
428
|
+
|
|
250
429
|
def _persist_signal(self, agent_id, signal_type, delta, old_score, new_score, context):
|
|
251
430
|
"""Save signal to trust_signals table."""
|
|
252
431
|
try:
|
|
@@ -265,7 +444,12 @@ class TrustScorer:
|
|
|
265
444
|
logger.error("Failed to persist trust signal: %s", e)
|
|
266
445
|
|
|
267
446
|
def _get_agent_trust(self, agent_id: str) -> Optional[float]:
|
|
268
|
-
"""
|
|
447
|
+
"""
|
|
448
|
+
Get current trust score from agent_registry.
|
|
449
|
+
|
|
450
|
+
This reads the derived trust_score column (which is always kept
|
|
451
|
+
in sync with alpha/(alpha+beta) by _set_beta_params).
|
|
452
|
+
"""
|
|
269
453
|
try:
|
|
270
454
|
from db_connection_manager import DbConnectionManager
|
|
271
455
|
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
@@ -282,7 +466,13 @@ class TrustScorer:
|
|
|
282
466
|
return None
|
|
283
467
|
|
|
284
468
|
def _update_agent_trust(self, agent_id: str, new_score: float):
|
|
285
|
-
"""
|
|
469
|
+
"""
|
|
470
|
+
Update trust score in agent_registry (legacy compatibility method).
|
|
471
|
+
|
|
472
|
+
In Beta-Binomial mode, this is a no-op because _set_beta_params
|
|
473
|
+
already updates trust_score alongside alpha and beta. Kept for
|
|
474
|
+
backward compatibility if any external code calls it directly.
|
|
475
|
+
"""
|
|
286
476
|
try:
|
|
287
477
|
from db_connection_manager import DbConnectionManager
|
|
288
478
|
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
@@ -373,16 +563,40 @@ class TrustScorer:
|
|
|
373
563
|
# =========================================================================
|
|
374
564
|
|
|
375
565
|
def get_trust_score(self, agent_id: str) -> float:
|
|
376
|
-
"""
|
|
377
|
-
score
|
|
378
|
-
|
|
566
|
+
"""
|
|
567
|
+
Get current trust score for an agent.
|
|
568
|
+
|
|
569
|
+
Computes alpha/(alpha+beta) from cached or stored Beta params.
|
|
570
|
+
Returns INITIAL_ALPHA/(INITIAL_ALPHA+INITIAL_BETA) = 0.667 for
|
|
571
|
+
unknown agents.
|
|
572
|
+
"""
|
|
573
|
+
alpha, beta = self._get_beta_params(agent_id)
|
|
574
|
+
if (alpha + beta) > 0:
|
|
575
|
+
return alpha / (alpha + beta)
|
|
576
|
+
return INITIAL_ALPHA / (INITIAL_ALPHA + INITIAL_BETA)
|
|
577
|
+
|
|
578
|
+
def get_beta_params(self, agent_id: str) -> Dict[str, float]:
|
|
579
|
+
"""
|
|
580
|
+
Get the Beta distribution parameters for an agent.
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
{"alpha": float, "beta": float, "trust_score": float}
|
|
584
|
+
"""
|
|
585
|
+
alpha, beta = self._get_beta_params(agent_id)
|
|
586
|
+
score = alpha / (alpha + beta) if (alpha + beta) > 0 else 0.0
|
|
587
|
+
return {
|
|
588
|
+
"alpha": round(alpha, 4),
|
|
589
|
+
"beta": round(beta, 4),
|
|
590
|
+
"trust_score": round(score, 4),
|
|
591
|
+
}
|
|
379
592
|
|
|
380
593
|
def check_trust(self, agent_id: str, operation: str = "write") -> bool:
|
|
381
594
|
"""
|
|
382
595
|
Check if agent is trusted enough for the given operation.
|
|
383
596
|
|
|
384
597
|
v2.6 enforcement: blocks write/delete for agents with trust < 0.3.
|
|
385
|
-
New agents start at 1.
|
|
598
|
+
New agents start at Beta(2,1) → trust 0.667 — only repeated bad
|
|
599
|
+
behavior triggers blocking.
|
|
386
600
|
|
|
387
601
|
Args:
|
|
388
602
|
agent_id: The agent identifier
|
|
@@ -394,14 +608,12 @@ class TrustScorer:
|
|
|
394
608
|
if operation == "read":
|
|
395
609
|
return True # Reads are always allowed
|
|
396
610
|
|
|
397
|
-
score = self.
|
|
398
|
-
if score is None:
|
|
399
|
-
return True # Unknown agent = first-time = allowed (starts at 1.0)
|
|
611
|
+
score = self.get_trust_score(agent_id)
|
|
400
612
|
|
|
401
613
|
threshold = 0.3 # Block write/delete below this
|
|
402
614
|
if score < threshold:
|
|
403
615
|
logger.warning(
|
|
404
|
-
"Trust enforcement: agent '%s' blocked from '%s' (trust=%.
|
|
616
|
+
"Trust enforcement: agent '%s' blocked from '%s' (trust=%.4f < %.2f)",
|
|
405
617
|
agent_id, operation, score, threshold
|
|
406
618
|
)
|
|
407
619
|
return False
|
|
@@ -479,7 +691,9 @@ class TrustScorer:
|
|
|
479
691
|
"total_signals": total_signals,
|
|
480
692
|
"by_signal_type": by_type,
|
|
481
693
|
"by_agent": by_agent,
|
|
482
|
-
"avg_trust_score": round(avg, 4) if avg else
|
|
694
|
+
"avg_trust_score": round(avg, 4) if avg else INITIAL_ALPHA / (INITIAL_ALPHA + INITIAL_BETA),
|
|
695
|
+
"scoring_model": "Beta-Binomial",
|
|
696
|
+
"prior": f"Beta({INITIAL_ALPHA}, {INITIAL_BETA})",
|
|
483
697
|
"enforcement": "enabled (v2.6 — write/delete blocked below 0.3 trust)",
|
|
484
698
|
}
|
|
485
699
|
|
package/ui/app.js
CHANGED
|
@@ -1445,9 +1445,9 @@ async function loadAgents() {
|
|
|
1445
1445
|
|
|
1446
1446
|
// Trust score
|
|
1447
1447
|
var tdTrust = document.createElement('td');
|
|
1448
|
-
var trustScore = agent.trust_score != null ? agent.trust_score :
|
|
1449
|
-
tdTrust.className = trustScore < 0.
|
|
1450
|
-
: trustScore < 0.
|
|
1448
|
+
var trustScore = agent.trust_score != null ? agent.trust_score : 0.667;
|
|
1449
|
+
tdTrust.className = trustScore < 0.3 ? 'text-danger fw-bold'
|
|
1450
|
+
: trustScore < 0.5 ? 'text-warning fw-bold' : 'text-success fw-bold';
|
|
1451
1451
|
tdTrust.textContent = trustScore.toFixed(2);
|
|
1452
1452
|
tr.appendChild(tdTrust);
|
|
1453
1453
|
|
|
@@ -1524,7 +1524,7 @@ async function loadTrustOverview() {
|
|
|
1524
1524
|
card2.className = 'border rounded p-3 text-center';
|
|
1525
1525
|
var val2 = document.createElement('div');
|
|
1526
1526
|
val2.className = 'fs-4 fw-bold';
|
|
1527
|
-
val2.textContent = (stats.avg_trust_score ||
|
|
1527
|
+
val2.textContent = (stats.avg_trust_score || 0.667).toFixed(3);
|
|
1528
1528
|
card2.appendChild(val2);
|
|
1529
1529
|
var lbl2 = document.createElement('small');
|
|
1530
1530
|
lbl2.className = 'text-muted';
|
package/ui/js/agents.js
CHANGED
|
@@ -78,9 +78,9 @@ async function loadAgents() {
|
|
|
78
78
|
tr.appendChild(tdProto);
|
|
79
79
|
|
|
80
80
|
var tdTrust = document.createElement('td');
|
|
81
|
-
var trustScore = agent.trust_score != null ? agent.trust_score :
|
|
82
|
-
tdTrust.className = trustScore < 0.
|
|
83
|
-
: trustScore < 0.
|
|
81
|
+
var trustScore = agent.trust_score != null ? agent.trust_score : 0.667;
|
|
82
|
+
tdTrust.className = trustScore < 0.3 ? 'text-danger fw-bold'
|
|
83
|
+
: trustScore < 0.5 ? 'text-warning fw-bold' : 'text-success fw-bold';
|
|
84
84
|
tdTrust.textContent = trustScore.toFixed(2);
|
|
85
85
|
tr.appendChild(tdTrust);
|
|
86
86
|
|
|
@@ -133,7 +133,7 @@ async function loadTrustOverview() {
|
|
|
133
133
|
|
|
134
134
|
var cardData = [
|
|
135
135
|
{ value: (stats.total_signals || 0).toLocaleString(), label: 'Total Signals Collected', cls: '' },
|
|
136
|
-
{ value: (stats.avg_trust_score ||
|
|
136
|
+
{ value: (stats.avg_trust_score || 0.667).toFixed(3), label: 'Average Trust Score', cls: '' },
|
|
137
137
|
{ value: stats.enforcement || 'disabled', label: 'Enforcement Status', cls: 'text-info' }
|
|
138
138
|
];
|
|
139
139
|
|