superlocalmemory 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/README.md +106 -71
- package/package.json +1 -2
- package/pyproject.toml +16 -1
- package/src/superlocalmemory/cli/commands.py +309 -0
- package/src/superlocalmemory/cli/main.py +44 -0
- package/src/superlocalmemory/core/config.py +282 -11
- package/src/superlocalmemory/core/consolidation_engine.py +37 -0
- package/src/superlocalmemory/core/engine.py +21 -0
- package/src/superlocalmemory/core/engine_wiring.py +58 -8
- package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
- package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
- package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
- package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
- package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
- package/src/superlocalmemory/infra/pid_manager.py +193 -0
- package/src/superlocalmemory/infra/process_reaper.py +572 -0
- package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
- package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
- package/src/superlocalmemory/math/ebbinghaus.py +309 -0
- package/src/superlocalmemory/math/fisher_quantized.py +251 -0
- package/src/superlocalmemory/math/hopfield.py +279 -0
- package/src/superlocalmemory/math/polar_quant.py +379 -0
- package/src/superlocalmemory/math/qjl.py +115 -0
- package/src/superlocalmemory/mcp/server.py +2 -0
- package/src/superlocalmemory/mcp/tools_v3.py +10 -0
- package/src/superlocalmemory/mcp/tools_v33.py +351 -0
- package/src/superlocalmemory/parameterization/__init__.py +47 -0
- package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
- package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
- package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
- package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
- package/src/superlocalmemory/retrieval/engine.py +21 -3
- package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
- package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
- package/src/superlocalmemory/retrieval/spreading_activation.py +1 -1
- package/src/superlocalmemory/retrieval/strategy.py +16 -6
- package/src/superlocalmemory/retrieval/vector_store.py +1 -1
- package/src/superlocalmemory/server/routes/agents.py +68 -8
- package/src/superlocalmemory/server/routes/learning.py +18 -1
- package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
- package/src/superlocalmemory/server/routes/v3_api.py +503 -1
- package/src/superlocalmemory/storage/database.py +206 -0
- package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
- package/src/superlocalmemory/storage/migration_v33.py +140 -0
- package/src/superlocalmemory/storage/quantized_store.py +261 -0
- package/src/superlocalmemory/storage/schema_v32.py +137 -0
- package/conftest.py +0 -5
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3.3 -- Hopfield Associative Memory (6th Retrieval Channel).
|
|
6
|
+
|
|
7
|
+
Modern Continuous Hopfield Network retrieval channel based on
|
|
8
|
+
Ramsauer et al. (2020): "Hopfield Networks is All You Need".
|
|
9
|
+
|
|
10
|
+
The Hopfield channel excels at pattern completion for vague/noisy queries.
|
|
11
|
+
It operates on the same embedding space as the semantic channel but uses
|
|
12
|
+
an energy-based attention mechanism instead of cosine similarity.
|
|
13
|
+
|
|
14
|
+
Key features:
|
|
15
|
+
- Full memory matrix path for stores < 10K facts
|
|
16
|
+
- ANN pre-filter path for stores 10K-100K (VectorStore KNN -> Hopfield refinement)
|
|
17
|
+
- Skip path for stores > 100K (other 5 channels are sufficient)
|
|
18
|
+
- TTL-based matrix cache to avoid rebuilding every query
|
|
19
|
+
- Returns [] on any error (HR-06)
|
|
20
|
+
|
|
21
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
22
|
+
License: MIT
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
import time
|
|
29
|
+
from typing import TYPE_CHECKING, Any
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
|
|
33
|
+
from superlocalmemory.math.hopfield import HopfieldConfig, ModernHopfieldNetwork
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from superlocalmemory.retrieval.vector_store import VectorStore
|
|
37
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class HopfieldChannel:
|
|
43
|
+
"""6th retrieval channel: Modern Hopfield associative memory.
|
|
44
|
+
|
|
45
|
+
Implements the RetrievalChannel protocol::
|
|
46
|
+
|
|
47
|
+
def search(query, profile_id, top_k=50) -> list[tuple[str, float]]
|
|
48
|
+
|
|
49
|
+
The channel builds an in-memory matrix from all fact embeddings,
|
|
50
|
+
computes Hopfield attention scores (softmax of scaled dot products),
|
|
51
|
+
then ranks facts by similarity to the completed pattern.
|
|
52
|
+
|
|
53
|
+
Routing logic (per LLD Section 2.2):
|
|
54
|
+
- n > skip_threshold (100K): return [] immediately
|
|
55
|
+
- n > prefilter_threshold (10K): ANN pre-filter + Hopfield on subset
|
|
56
|
+
- n <= prefilter_threshold: full matrix Hopfield
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
db: Any,
|
|
62
|
+
vector_store: Any,
|
|
63
|
+
config: HopfieldConfig | None = None,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Initialize HopfieldChannel.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
db: DatabaseManager with get_all_facts() and get_facts_by_ids().
|
|
69
|
+
vector_store: VectorStore with search() and count().
|
|
70
|
+
config: Hopfield configuration. Uses defaults if None.
|
|
71
|
+
"""
|
|
72
|
+
self._db = db
|
|
73
|
+
self._vector_store = vector_store
|
|
74
|
+
self._config = config or HopfieldConfig()
|
|
75
|
+
self._hopfield = ModernHopfieldNetwork(self._config)
|
|
76
|
+
|
|
77
|
+
# Memory matrix cache (per LLD Section 2.2, HR-09)
|
|
78
|
+
self._cached_matrix: np.ndarray | None = None
|
|
79
|
+
self._cached_fact_ids: list[str] = []
|
|
80
|
+
self._cached_profile: str = ""
|
|
81
|
+
self._cached_count: int = 0
|
|
82
|
+
self._cache_timestamp: float = 0.0
|
|
83
|
+
|
|
84
|
+
# -- Public API (RetrievalChannel protocol) --------------------------------
|
|
85
|
+
|
|
86
|
+
def search(
|
|
87
|
+
self,
|
|
88
|
+
query: Any,
|
|
89
|
+
profile_id: str,
|
|
90
|
+
top_k: int = 50,
|
|
91
|
+
) -> list[tuple[str, float]]:
|
|
92
|
+
"""Search for facts using Hopfield associative retrieval.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
query: Query embedding (list[float] or np.ndarray).
|
|
96
|
+
profile_id: Scope search to this profile.
|
|
97
|
+
top_k: Maximum results to return.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of (fact_id, score) sorted by score descending.
|
|
101
|
+
Returns [] on any error (HR-06).
|
|
102
|
+
"""
|
|
103
|
+
# Step 1: Check enabled
|
|
104
|
+
if not self._config.enabled:
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
return self._search_inner(query, profile_id, top_k)
|
|
109
|
+
except Exception as exc:
|
|
110
|
+
# HR-06: Return [] on any error
|
|
111
|
+
logger.warning("Hopfield channel error: %s", exc)
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
# -- Private implementation ------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def _search_inner(
|
|
117
|
+
self,
|
|
118
|
+
query: Any,
|
|
119
|
+
profile_id: str,
|
|
120
|
+
top_k: int,
|
|
121
|
+
) -> list[tuple[str, float]]:
|
|
122
|
+
"""Core search logic, separated for clean error handling."""
|
|
123
|
+
# Step 2: Convert query to numpy
|
|
124
|
+
q_vec = np.array(query, dtype=np.float32)
|
|
125
|
+
|
|
126
|
+
# Step 3: Validate dimension
|
|
127
|
+
if q_vec.shape != (self._config.dimension,):
|
|
128
|
+
logger.debug(
|
|
129
|
+
"Hopfield dimension mismatch: query %s, expected (%d,)",
|
|
130
|
+
q_vec.shape, self._config.dimension,
|
|
131
|
+
)
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
# Step 3b (AUDIT FIX G-MEDIUM-02): Check skip_threshold BEFORE loading matrix
|
|
135
|
+
total_count = (
|
|
136
|
+
self._vector_store.count(profile_id)
|
|
137
|
+
if self._vector_store and getattr(self._vector_store, "available", False)
|
|
138
|
+
else 0
|
|
139
|
+
)
|
|
140
|
+
# Step 3c: Skip for very large stores
|
|
141
|
+
if total_count > self._config.skip_threshold:
|
|
142
|
+
logger.debug(
|
|
143
|
+
"Hopfield skipped: %d facts exceeds skip_threshold %d",
|
|
144
|
+
total_count, self._config.skip_threshold,
|
|
145
|
+
)
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Step 4: Get memory matrix
|
|
149
|
+
memory_matrix, fact_ids = self._get_memory_matrix(profile_id)
|
|
150
|
+
|
|
151
|
+
# Step 5: Empty check
|
|
152
|
+
if memory_matrix is None or len(fact_ids) == 0:
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
# Step 6/7: Route by size
|
|
156
|
+
if len(fact_ids) > self._config.prefilter_threshold:
|
|
157
|
+
return self._search_with_prefilter(
|
|
158
|
+
q_vec, profile_id, fact_ids, top_k,
|
|
159
|
+
)
|
|
160
|
+
return self._search_full_matrix(
|
|
161
|
+
q_vec, memory_matrix, fact_ids, top_k,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _search_full_matrix(
|
|
165
|
+
self,
|
|
166
|
+
query: np.ndarray,
|
|
167
|
+
memory_matrix: np.ndarray,
|
|
168
|
+
fact_ids: list[str],
|
|
169
|
+
top_k: int,
|
|
170
|
+
) -> list[tuple[str, float]]:
|
|
171
|
+
"""Full matrix Hopfield retrieval for stores <= prefilter_threshold.
|
|
172
|
+
|
|
173
|
+
Algorithm (LLD Section 2.2):
|
|
174
|
+
1. Compute Hopfield attention weights
|
|
175
|
+
2. Compute retrieved (completed) pattern via weighted sum
|
|
176
|
+
3. Normalize retrieved pattern
|
|
177
|
+
4. Score all stored patterns against the completed pattern
|
|
178
|
+
5. Return top-K by similarity
|
|
179
|
+
"""
|
|
180
|
+
# Step 1: Hopfield attention
|
|
181
|
+
attention = self._hopfield.attention_scores(query, memory_matrix)
|
|
182
|
+
|
|
183
|
+
# Step 2: Pattern completion
|
|
184
|
+
retrieved = memory_matrix.T @ attention # shape (d,)
|
|
185
|
+
|
|
186
|
+
# Step 3: Normalize
|
|
187
|
+
norm = float(np.linalg.norm(retrieved))
|
|
188
|
+
if norm > 1e-8:
|
|
189
|
+
retrieved = retrieved / norm
|
|
190
|
+
|
|
191
|
+
# Step 4: Similarity to all patterns
|
|
192
|
+
similarities = memory_matrix @ retrieved # shape (n,)
|
|
193
|
+
|
|
194
|
+
# Step 5: Top-K selection
|
|
195
|
+
top_indices = np.argsort(-similarities)[:top_k]
|
|
196
|
+
results: list[tuple[str, float]] = [
|
|
197
|
+
(fact_ids[int(i)], float(similarities[i]))
|
|
198
|
+
for i in top_indices
|
|
199
|
+
if similarities[i] > 0.0
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
return results
|
|
203
|
+
|
|
204
|
+
def _search_with_prefilter(
|
|
205
|
+
self,
|
|
206
|
+
query: np.ndarray,
|
|
207
|
+
profile_id: str,
|
|
208
|
+
all_fact_ids: list[str],
|
|
209
|
+
top_k: int,
|
|
210
|
+
) -> list[tuple[str, float]]:
|
|
211
|
+
"""Two-stage retrieval for large stores (>prefilter_threshold facts).
|
|
212
|
+
|
|
213
|
+
Stage 1: VectorStore KNN pre-filter to get candidate subset
|
|
214
|
+
Stage 2: Hopfield refinement on the small candidate set
|
|
215
|
+
|
|
216
|
+
Algorithm (LLD Section 2.2):
|
|
217
|
+
1. Get KNN candidates from VectorStore
|
|
218
|
+
2. Load candidate facts from DB
|
|
219
|
+
3. Build sub-matrix from candidate embeddings
|
|
220
|
+
4. Run full-matrix Hopfield on the sub-matrix
|
|
221
|
+
"""
|
|
222
|
+
# Stage 1: KNN pre-filter
|
|
223
|
+
if not self._vector_store or not getattr(self._vector_store, "available", False):
|
|
224
|
+
# No vector store available; fall back to full matrix
|
|
225
|
+
# (only reached if matrix was somehow loaded despite no VS)
|
|
226
|
+
return []
|
|
227
|
+
|
|
228
|
+
knn_results = self._vector_store.search(
|
|
229
|
+
query.tolist(),
|
|
230
|
+
top_k=self._config.prefilter_candidates,
|
|
231
|
+
profile_id=profile_id,
|
|
232
|
+
)
|
|
233
|
+
if not knn_results:
|
|
234
|
+
return []
|
|
235
|
+
|
|
236
|
+
# Stage 2: Load candidate facts
|
|
237
|
+
candidate_ids = [fid for fid, _ in knn_results]
|
|
238
|
+
candidates = self._db.get_facts_by_ids(candidate_ids, profile_id)
|
|
239
|
+
if not candidates:
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
# Stage 3: Build sub-matrix
|
|
243
|
+
sub_embeddings: list[np.ndarray] = []
|
|
244
|
+
sub_ids: list[str] = []
|
|
245
|
+
for fact in candidates:
|
|
246
|
+
emb = getattr(fact, "embedding", None)
|
|
247
|
+
if emb is not None and len(emb) == self._config.dimension:
|
|
248
|
+
sub_embeddings.append(np.array(emb, dtype=np.float32))
|
|
249
|
+
sub_ids.append(fact.fact_id)
|
|
250
|
+
|
|
251
|
+
if not sub_embeddings:
|
|
252
|
+
return []
|
|
253
|
+
|
|
254
|
+
sub_matrix = np.array(sub_embeddings, dtype=np.float32)
|
|
255
|
+
|
|
256
|
+
# HR-03: L2-normalize sub-matrix rows
|
|
257
|
+
norms = np.linalg.norm(sub_matrix, axis=1, keepdims=True)
|
|
258
|
+
norms = np.maximum(norms, 1e-8)
|
|
259
|
+
sub_matrix = sub_matrix / norms
|
|
260
|
+
|
|
261
|
+
# Stage 4: Hopfield on subset
|
|
262
|
+
return self._search_full_matrix(query, sub_matrix, sub_ids, top_k)
|
|
263
|
+
|
|
264
|
+
def _get_memory_matrix(
|
|
265
|
+
self, profile_id: str,
|
|
266
|
+
) -> tuple[np.ndarray | None, list[str]]:
|
|
267
|
+
"""Build or retrieve cached memory matrix X (n x d).
|
|
268
|
+
|
|
269
|
+
The matrix is L2-normalized per row (HR-03) and cached
|
|
270
|
+
with a TTL (HR-09, default 60s).
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
(memory_matrix, fact_ids) or (None, []) if no valid facts.
|
|
274
|
+
"""
|
|
275
|
+
# Step 1: Check cache validity
|
|
276
|
+
current_count = (
|
|
277
|
+
self._vector_store.count(profile_id)
|
|
278
|
+
if self._vector_store and getattr(self._vector_store, "available", False)
|
|
279
|
+
else 0
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if (
|
|
283
|
+
self._cached_profile == profile_id
|
|
284
|
+
and self._cached_count == current_count
|
|
285
|
+
and self._cached_matrix is not None
|
|
286
|
+
and (time.monotonic() - self._cache_timestamp)
|
|
287
|
+
< self._config.cache_ttl_seconds
|
|
288
|
+
):
|
|
289
|
+
return (self._cached_matrix, self._cached_fact_ids)
|
|
290
|
+
|
|
291
|
+
# Step 2: Load all facts
|
|
292
|
+
facts = self._db.get_all_facts(profile_id)
|
|
293
|
+
if not facts:
|
|
294
|
+
return (None, [])
|
|
295
|
+
|
|
296
|
+
# Step 4: Filter facts with valid embeddings
|
|
297
|
+
valid: list[tuple[str, list[float]]] = []
|
|
298
|
+
for f in facts:
|
|
299
|
+
emb = getattr(f, "embedding", None)
|
|
300
|
+
if emb is not None and len(emb) == self._config.dimension:
|
|
301
|
+
valid.append((f.fact_id, emb))
|
|
302
|
+
|
|
303
|
+
if not valid:
|
|
304
|
+
return (None, [])
|
|
305
|
+
|
|
306
|
+
# Step 6: Build matrix
|
|
307
|
+
fact_ids = [fid for fid, _ in valid]
|
|
308
|
+
matrix = np.array(
|
|
309
|
+
[emb for _, emb in valid], dtype=np.float32,
|
|
310
|
+
) # shape (n, d)
|
|
311
|
+
|
|
312
|
+
# Step 7: HR-03 — L2 normalize each row
|
|
313
|
+
norms = np.linalg.norm(matrix, axis=1, keepdims=True)
|
|
314
|
+
norms = np.maximum(norms, 1e-8)
|
|
315
|
+
matrix = matrix / norms
|
|
316
|
+
|
|
317
|
+
# Step 8: Update cache
|
|
318
|
+
self._cached_matrix = matrix
|
|
319
|
+
self._cached_fact_ids = fact_ids
|
|
320
|
+
self._cached_profile = profile_id
|
|
321
|
+
self._cached_count = current_count
|
|
322
|
+
self._cache_timestamp = time.monotonic()
|
|
323
|
+
|
|
324
|
+
return (matrix, fact_ids)
|
|
325
|
+
|
|
326
|
+
def invalidate_cache(self) -> None:
|
|
327
|
+
"""Clear the memory matrix cache.
|
|
328
|
+
|
|
329
|
+
Called after fact insertion/deletion to ensure
|
|
330
|
+
next search rebuilds with fresh data.
|
|
331
|
+
"""
|
|
332
|
+
self._cached_matrix = None
|
|
333
|
+
self._cached_fact_ids = []
|
|
334
|
+
self._cached_count = 0
|
|
335
|
+
self._cache_timestamp = 0.0
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""Three-tier mixed-precision search.
|
|
6
|
+
|
|
7
|
+
Merges results from:
|
|
8
|
+
Tier 1: float32 (VectorStore.search -- exact cosine)
|
|
9
|
+
Tier 2: int8 (VectorStore.search_int8 -- sqlite-vec native)
|
|
10
|
+
Tier 3: polar (QuantizedEmbeddingStore.search -- PolarQuant)
|
|
11
|
+
|
|
12
|
+
Deduplicates by keeping the highest score per fact_id.
|
|
13
|
+
Applies precision-dependent score penalties:
|
|
14
|
+
- float32: no penalty (1.0x)
|
|
15
|
+
- int8: 0.98x
|
|
16
|
+
- polar: config.polar_search_penalty (default 0.95x)
|
|
17
|
+
|
|
18
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
19
|
+
License: MIT
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
26
|
+
|
|
27
|
+
from numpy.typing import NDArray
|
|
28
|
+
|
|
29
|
+
from superlocalmemory.core.config import QuantizationConfig
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from superlocalmemory.storage.quantized_store import QuantizedEmbeddingStore
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
# Penalty factor for int8 tier (fixed, not configurable)
|
|
37
|
+
_INT8_PENALTY: float = 0.98
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class QuantizationAwareSearch:
|
|
41
|
+
"""Three-tier mixed-precision embedding search.
|
|
42
|
+
|
|
43
|
+
Combines float32 + int8 + polar results, deduplicates,
|
|
44
|
+
and returns top_k by score descending.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
__slots__ = ("_vector_store", "_quantized_store", "_config")
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
vector_store,
|
|
52
|
+
quantized_store: QuantizedEmbeddingStore,
|
|
53
|
+
config: QuantizationConfig,
|
|
54
|
+
) -> None:
|
|
55
|
+
self._vector_store = vector_store
|
|
56
|
+
self._quantized_store = quantized_store
|
|
57
|
+
self._config = config
|
|
58
|
+
|
|
59
|
+
def search(
|
|
60
|
+
self,
|
|
61
|
+
query_embedding: NDArray,
|
|
62
|
+
profile_id: str,
|
|
63
|
+
top_k: int = 50,
|
|
64
|
+
) -> list[tuple[str, float]]:
|
|
65
|
+
"""Execute three-tier mixed-precision search.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
query_embedding: Query vector (float32/64).
|
|
69
|
+
profile_id: Scope to this profile.
|
|
70
|
+
top_k: Max results to return.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
[(fact_id, score)] sorted by score descending.
|
|
74
|
+
"""
|
|
75
|
+
# Tier 1: float32 exact search
|
|
76
|
+
results_f32 = self._search_float32(query_embedding, profile_id, top_k)
|
|
77
|
+
|
|
78
|
+
# Tier 2: int8 approximate search
|
|
79
|
+
results_int8 = self._search_int8(query_embedding, profile_id, top_k)
|
|
80
|
+
|
|
81
|
+
# Tier 3: polar quantized search
|
|
82
|
+
results_polar = self._search_polar(query_embedding, profile_id, top_k)
|
|
83
|
+
|
|
84
|
+
# Merge + dedup (keep highest score per fact_id)
|
|
85
|
+
seen: dict[str, float] = {}
|
|
86
|
+
for fid, score in results_f32 + results_int8 + results_polar:
|
|
87
|
+
if fid not in seen or score > seen[fid]:
|
|
88
|
+
seen[fid] = score
|
|
89
|
+
|
|
90
|
+
# Sort by score descending
|
|
91
|
+
merged = sorted(seen.items(), key=lambda x: x[1], reverse=True)
|
|
92
|
+
return merged[:top_k]
|
|
93
|
+
|
|
94
|
+
# -- Tier helpers (encapsulate error handling) -------------------------
|
|
95
|
+
|
|
96
|
+
def _search_float32(
|
|
97
|
+
self, query: NDArray, profile_id: str, top_k: int,
|
|
98
|
+
) -> list[tuple[str, float]]:
|
|
99
|
+
"""Tier 1: float32 exact cosine via VectorStore."""
|
|
100
|
+
try:
|
|
101
|
+
return self._vector_store.search(query, profile_id, top_k)
|
|
102
|
+
except Exception as exc:
|
|
103
|
+
logger.debug("float32 search failed: %s", exc)
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
def _search_int8(
|
|
107
|
+
self, query: NDArray, profile_id: str, top_k: int,
|
|
108
|
+
) -> list[tuple[str, float]]:
|
|
109
|
+
"""Tier 2: int8 approximate via VectorStore.search_int8.
|
|
110
|
+
|
|
111
|
+
Applies 0.98x penalty to account for int8 quantization error.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
raw = self._vector_store.search_int8(query, profile_id, top_k)
|
|
115
|
+
return [(fid, score * _INT8_PENALTY) for fid, score in raw]
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
logger.debug("int8 search failed: %s", exc)
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
def _search_polar(
|
|
121
|
+
self, query: NDArray, profile_id: str, top_k: int,
|
|
122
|
+
) -> list[tuple[str, float]]:
|
|
123
|
+
"""Tier 3: polar quantized via QuantizedEmbeddingStore.
|
|
124
|
+
|
|
125
|
+
Applies polar_search_penalty from config.
|
|
126
|
+
"""
|
|
127
|
+
try:
|
|
128
|
+
raw = self._quantized_store.search(query, profile_id, top_k)
|
|
129
|
+
penalty = self._config.polar_search_penalty
|
|
130
|
+
return [(fid, score * penalty) for fid, score in raw]
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
logger.debug("polar search failed: %s", exc)
|
|
133
|
+
return []
|
|
@@ -50,7 +50,7 @@ class SpreadingActivationConfig:
|
|
|
50
50
|
top_m: int = 7 # Lateral inhibition: max active nodes
|
|
51
51
|
max_iterations: int = 3 # T: propagation depth
|
|
52
52
|
tau_gate: float = 0.12 # FOK confidence gate
|
|
53
|
-
enabled: bool =
|
|
53
|
+
enabled: bool = True # Ships enabled by default
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
# ---------------------------------------------------------------------------
|
|
@@ -16,13 +16,14 @@ import re
|
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
17
|
|
|
18
18
|
STRATEGY_PRESETS: dict[str, dict[str, float]] = {
|
|
19
|
-
"temporal": {"semantic": 0.8, "bm25": 1.5, "entity_graph": 0.8, "temporal": 2.0, "spreading_activation": 0.5},
|
|
20
|
-
"multi_hop": {"semantic": 1.0, "bm25": 0.8, "entity_graph": 2.0, "temporal": 0.5, "spreading_activation": 2.0},
|
|
21
|
-
"aggregation": {"semantic": 1.2, "bm25": 1.5, "entity_graph": 1.0, "temporal": 0.5, "spreading_activation": 0.8},
|
|
22
|
-
"opinion": {"semantic": 1.8, "bm25": 0.6, "entity_graph": 0.8, "temporal": 0.3, "spreading_activation": 0.5},
|
|
23
|
-
"factual": {"semantic": 1.2, "bm25": 1.4, "entity_graph": 1.0, "temporal": 0.6, "spreading_activation": 0.8},
|
|
24
|
-
"entity": {"semantic": 1.0, "bm25": 1.5, "entity_graph": 1.2, "temporal": 0.5, "spreading_activation": 1.0},
|
|
19
|
+
"temporal": {"semantic": 0.8, "bm25": 1.5, "entity_graph": 0.8, "temporal": 2.0, "spreading_activation": 0.5, "hopfield": 0.5},
|
|
20
|
+
"multi_hop": {"semantic": 1.0, "bm25": 0.8, "entity_graph": 2.0, "temporal": 0.5, "spreading_activation": 2.0, "hopfield": 0.7},
|
|
21
|
+
"aggregation": {"semantic": 1.2, "bm25": 1.5, "entity_graph": 1.0, "temporal": 0.5, "spreading_activation": 0.8, "hopfield": 0.6},
|
|
22
|
+
"opinion": {"semantic": 1.8, "bm25": 0.6, "entity_graph": 0.8, "temporal": 0.3, "spreading_activation": 0.5, "hopfield": 0.5},
|
|
23
|
+
"factual": {"semantic": 1.2, "bm25": 1.4, "entity_graph": 1.0, "temporal": 0.6, "spreading_activation": 0.8, "hopfield": 0.8},
|
|
24
|
+
"entity": {"semantic": 1.0, "bm25": 1.5, "entity_graph": 1.2, "temporal": 0.5, "spreading_activation": 1.0, "hopfield": 0.9},
|
|
25
25
|
"general": {},
|
|
26
|
+
"vague": {"semantic": 0.8, "bm25": 0.5, "entity_graph": 0.6, "temporal": 0.3, "spreading_activation": 1.5, "hopfield": 1.1},
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
_TEMPORAL_WORDS: frozenset[str] = frozenset({
|
|
@@ -48,6 +49,12 @@ _OPINION_WORDS: tuple[str, ...] = (
|
|
|
48
49
|
"believe", "like about", "dislike", "enjoy", "hate", "love",
|
|
49
50
|
)
|
|
50
51
|
|
|
52
|
+
_VAGUE_PHRASES: tuple[str, ...] = (
|
|
53
|
+
"something about", "i think", "maybe", "not sure",
|
|
54
|
+
"vaguely remember", "partially recall", "sort of",
|
|
55
|
+
"kind of", "i forgot", "what was that",
|
|
56
|
+
)
|
|
57
|
+
|
|
51
58
|
|
|
52
59
|
@dataclass
|
|
53
60
|
class QueryStrategy:
|
|
@@ -93,4 +100,7 @@ class QueryStrategyClassifier:
|
|
|
93
100
|
return "entity"
|
|
94
101
|
if q.startswith(("what ", "where ", "who ", "which ", "how ")):
|
|
95
102
|
return "factual"
|
|
103
|
+
# Vague/fuzzy recall — Hopfield pattern completion excels here
|
|
104
|
+
if any(p in q for p in _VAGUE_PHRASES):
|
|
105
|
+
return "vague"
|
|
96
106
|
return "general"
|
|
@@ -32,7 +32,7 @@ class VectorStoreConfig:
|
|
|
32
32
|
dimension: int = 768
|
|
33
33
|
binary_quantization_threshold: int = 100_000 # L4 fix
|
|
34
34
|
model_name: str = "nomic-embed-text-v1.5"
|
|
35
|
-
enabled: bool =
|
|
35
|
+
enabled: bool = True # Ships enabled by default
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class VectorStore:
|
|
@@ -73,15 +73,75 @@ async def get_agent_stats(request: Request):
|
|
|
73
73
|
|
|
74
74
|
@router.get("/api/trust/stats")
|
|
75
75
|
async def get_trust_stats(request: Request):
|
|
76
|
-
"""Get trust scoring statistics.
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
"""Get trust scoring statistics.
|
|
77
|
+
|
|
78
|
+
Queries trust_scores and trust_signals tables directly (no engine needed).
|
|
79
|
+
Falls back to engine._trust_scorer if available.
|
|
80
|
+
"""
|
|
79
81
|
try:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
# Try engine-based scorer first
|
|
83
|
+
try:
|
|
84
|
+
engine = getattr(request.app.state, "engine", None)
|
|
85
|
+
if engine and getattr(engine, "_trust_scorer", None):
|
|
86
|
+
return engine._trust_scorer.get_trust_stats()
|
|
87
|
+
except (AttributeError, Exception):
|
|
88
|
+
pass # Fall through to direct DB query
|
|
89
|
+
|
|
90
|
+
# Direct DB query (dashboard runs without engine subprocess)
|
|
91
|
+
import sqlite3
|
|
92
|
+
from .helpers import get_active_profile
|
|
93
|
+
pid = get_active_profile()
|
|
94
|
+
|
|
95
|
+
total_signals = 0
|
|
96
|
+
avg_trust_score = 0.667
|
|
97
|
+
by_signal_type = {}
|
|
98
|
+
|
|
99
|
+
if DB_PATH.exists():
|
|
100
|
+
conn = sqlite3.connect(str(DB_PATH))
|
|
101
|
+
conn.row_factory = sqlite3.Row
|
|
102
|
+
try:
|
|
103
|
+
# Count trust signals
|
|
104
|
+
row = conn.execute(
|
|
105
|
+
"SELECT COUNT(*) AS cnt FROM trust_signals "
|
|
106
|
+
"WHERE profile_id = ?", (pid,),
|
|
107
|
+
).fetchone()
|
|
108
|
+
total_signals = row["cnt"] if row else 0
|
|
109
|
+
except sqlite3.OperationalError:
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
# Average trust score
|
|
114
|
+
row = conn.execute(
|
|
115
|
+
"SELECT AVG(trust_score) AS avg_ts FROM trust_scores "
|
|
116
|
+
"WHERE profile_id = ?", (pid,),
|
|
117
|
+
).fetchone()
|
|
118
|
+
if row and row["avg_ts"] is not None:
|
|
119
|
+
avg_trust_score = round(float(row["avg_ts"]), 3)
|
|
120
|
+
except sqlite3.OperationalError:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
# Signal breakdown by type
|
|
125
|
+
rows = conn.execute(
|
|
126
|
+
"SELECT signal_type, COUNT(*) AS cnt "
|
|
127
|
+
"FROM trust_signals WHERE profile_id = ? "
|
|
128
|
+
"GROUP BY signal_type", (pid,),
|
|
129
|
+
).fetchall()
|
|
130
|
+
by_signal_type = {r["signal_type"]: r["cnt"] for r in rows}
|
|
131
|
+
except sqlite3.OperationalError:
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
conn.close()
|
|
135
|
+
|
|
136
|
+
# Enforcement status: SLM uses "Silent Collection" by default
|
|
137
|
+
enforcement = "Silent Collection"
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
"total_signals": total_signals,
|
|
141
|
+
"avg_trust_score": avg_trust_score,
|
|
142
|
+
"enforcement": enforcement,
|
|
143
|
+
"by_signal_type": by_signal_type,
|
|
144
|
+
}
|
|
85
145
|
except Exception as e:
|
|
86
146
|
raise HTTPException(status_code=500, detail=f"Trust stats error: {str(e)}")
|
|
87
147
|
|
|
@@ -81,13 +81,29 @@ async def learning_status():
|
|
|
81
81
|
|
|
82
82
|
# Real signal count from V3.1 learning_feedback table
|
|
83
83
|
signal_count = 0
|
|
84
|
+
unique_queries = 0
|
|
84
85
|
try:
|
|
85
86
|
from superlocalmemory.learning.feedback import FeedbackCollector
|
|
86
87
|
from pathlib import Path
|
|
88
|
+
import sqlite3 as _sqlite3
|
|
87
89
|
learning_db = Path.home() / ".superlocalmemory" / "learning.db"
|
|
88
90
|
if learning_db.exists():
|
|
89
91
|
collector = FeedbackCollector(learning_db)
|
|
90
92
|
signal_count = collector.get_feedback_count(active_profile)
|
|
93
|
+
# Count unique queries for the dashboard
|
|
94
|
+
_conn = _sqlite3.connect(str(learning_db))
|
|
95
|
+
_conn.row_factory = _sqlite3.Row
|
|
96
|
+
try:
|
|
97
|
+
_row = _conn.execute(
|
|
98
|
+
"SELECT COUNT(DISTINCT query_hash) AS cnt "
|
|
99
|
+
"FROM learning_feedback WHERE profile_id = ?",
|
|
100
|
+
(active_profile,),
|
|
101
|
+
).fetchone()
|
|
102
|
+
unique_queries = _row["cnt"] if _row else 0
|
|
103
|
+
except Exception:
|
|
104
|
+
pass
|
|
105
|
+
finally:
|
|
106
|
+
_conn.close()
|
|
91
107
|
except Exception:
|
|
92
108
|
pass
|
|
93
109
|
|
|
@@ -100,13 +116,14 @@ async def learning_status():
|
|
|
100
116
|
result["ranking_phase"] = "baseline"
|
|
101
117
|
|
|
102
118
|
# Feedback stats — merge old system + new V3.1 signals
|
|
103
|
-
stats_dict = {"feedback_count": signal_count, "active_profile": active_profile}
|
|
119
|
+
stats_dict = {"feedback_count": signal_count, "unique_queries": unique_queries, "active_profile": active_profile}
|
|
104
120
|
feedback = _get_feedback()
|
|
105
121
|
if feedback:
|
|
106
122
|
try:
|
|
107
123
|
old_stats = feedback.get_feedback_summary(active_profile)
|
|
108
124
|
if isinstance(old_stats, dict):
|
|
109
125
|
old_stats["feedback_count"] = signal_count
|
|
126
|
+
old_stats["unique_queries"] = unique_queries
|
|
110
127
|
old_stats["active_profile"] = active_profile
|
|
111
128
|
stats_dict = old_stats
|
|
112
129
|
except Exception as exc:
|