superlocalmemory 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/README.md +106 -71
- package/package.json +1 -2
- package/pyproject.toml +16 -1
- package/src/superlocalmemory/cli/commands.py +309 -0
- package/src/superlocalmemory/cli/main.py +44 -0
- package/src/superlocalmemory/core/config.py +282 -11
- package/src/superlocalmemory/core/consolidation_engine.py +37 -0
- package/src/superlocalmemory/core/engine.py +21 -0
- package/src/superlocalmemory/core/engine_wiring.py +58 -8
- package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
- package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
- package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
- package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
- package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
- package/src/superlocalmemory/infra/pid_manager.py +193 -0
- package/src/superlocalmemory/infra/process_reaper.py +572 -0
- package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
- package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
- package/src/superlocalmemory/math/ebbinghaus.py +309 -0
- package/src/superlocalmemory/math/fisher_quantized.py +251 -0
- package/src/superlocalmemory/math/hopfield.py +279 -0
- package/src/superlocalmemory/math/polar_quant.py +379 -0
- package/src/superlocalmemory/math/qjl.py +115 -0
- package/src/superlocalmemory/mcp/server.py +2 -0
- package/src/superlocalmemory/mcp/tools_v3.py +10 -0
- package/src/superlocalmemory/mcp/tools_v33.py +351 -0
- package/src/superlocalmemory/parameterization/__init__.py +47 -0
- package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
- package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
- package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
- package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
- package/src/superlocalmemory/retrieval/engine.py +21 -3
- package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
- package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
- package/src/superlocalmemory/retrieval/spreading_activation.py +1 -1
- package/src/superlocalmemory/retrieval/strategy.py +16 -6
- package/src/superlocalmemory/retrieval/vector_store.py +1 -1
- package/src/superlocalmemory/server/routes/agents.py +68 -8
- package/src/superlocalmemory/server/routes/learning.py +18 -1
- package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
- package/src/superlocalmemory/server/routes/v3_api.py +503 -1
- package/src/superlocalmemory/storage/database.py +206 -0
- package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
- package/src/superlocalmemory/storage/migration_v33.py +140 -0
- package/src/superlocalmemory/storage/quantized_store.py +261 -0
- package/src/superlocalmemory/storage/schema_v32.py +137 -0
- package/conftest.py +0 -5
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3.3
|
|
4
|
+
|
|
5
|
+
"""SoftPromptGenerator — Convert extracted patterns to text soft prompts.
|
|
6
|
+
|
|
7
|
+
Pure text personality encoding. No LoRA, no model weights.
|
|
8
|
+
Token budget management with priority ordering by category.
|
|
9
|
+
|
|
10
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import uuid
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from superlocalmemory.core.config import ParameterizationConfig
|
|
23
|
+
|
|
24
|
+
from superlocalmemory.parameterization.pattern_extractor import (
|
|
25
|
+
PatternAssertion,
|
|
26
|
+
PatternCategory,
|
|
27
|
+
)
|
|
28
|
+
from superlocalmemory.parameterization.pii_filter import PIIFilter
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Category templates — natural language patterns for each category
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
CATEGORY_TEMPLATES: dict[str, str] = {
|
|
37
|
+
"identity": (
|
|
38
|
+
"The user is {role} with expertise in {domains}. "
|
|
39
|
+
"They work at {organization}."
|
|
40
|
+
),
|
|
41
|
+
"tech_preference": (
|
|
42
|
+
"The user's preferred technology stack includes: {technologies}. "
|
|
43
|
+
"Default to these when generating code or making recommendations."
|
|
44
|
+
),
|
|
45
|
+
"communication_style": (
|
|
46
|
+
"The user prefers {style} responses. {specific_preferences}"
|
|
47
|
+
),
|
|
48
|
+
"workflow_pattern": (
|
|
49
|
+
"The user typically {workflow_description}. "
|
|
50
|
+
"Anticipate this workflow when assisting."
|
|
51
|
+
),
|
|
52
|
+
"project_context": (
|
|
53
|
+
"Current active project: {project_name}. "
|
|
54
|
+
"Key context: {context_summary}."
|
|
55
|
+
),
|
|
56
|
+
"decision_history": (
|
|
57
|
+
"Recent key decisions: {decisions}. "
|
|
58
|
+
"These reflect the user's current direction."
|
|
59
|
+
),
|
|
60
|
+
"avoidance": (
|
|
61
|
+
"The user has explicitly asked to avoid: {avoid_list}. "
|
|
62
|
+
"Do not suggest or use these."
|
|
63
|
+
),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
CATEGORY_PRIORITY_ORDER: list[str] = [
|
|
67
|
+
"identity",
|
|
68
|
+
"tech_preference",
|
|
69
|
+
"communication_style",
|
|
70
|
+
"workflow_pattern",
|
|
71
|
+
"project_context",
|
|
72
|
+
"decision_history",
|
|
73
|
+
"avoidance",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Data model
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class SoftPromptTemplate:
|
|
83
|
+
"""A generated soft prompt for one category."""
|
|
84
|
+
|
|
85
|
+
prompt_id: str
|
|
86
|
+
profile_id: str
|
|
87
|
+
category: str
|
|
88
|
+
content: str
|
|
89
|
+
source_pattern_ids: list[str]
|
|
90
|
+
confidence: float
|
|
91
|
+
effectiveness: float
|
|
92
|
+
token_count: int
|
|
93
|
+
retention_score: float
|
|
94
|
+
active: bool
|
|
95
|
+
version: int
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# SoftPromptGenerator class
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
class SoftPromptGenerator:
|
|
103
|
+
"""Convert extracted pattern assertions into natural language soft prompts.
|
|
104
|
+
|
|
105
|
+
Respects token budget and priority ordering. Filters PII.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(self, config: ParameterizationConfig) -> None:
|
|
109
|
+
self._config = config
|
|
110
|
+
self._pii_filter = PIIFilter()
|
|
111
|
+
|
|
112
|
+
# ------------------------------------------------------------------
|
|
113
|
+
# Public API
|
|
114
|
+
# ------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def generate(
|
|
117
|
+
self,
|
|
118
|
+
patterns: list[PatternAssertion],
|
|
119
|
+
profile_id: str,
|
|
120
|
+
) -> list[SoftPromptTemplate]:
|
|
121
|
+
"""Master generation pipeline: filter, group, render, budget-trim.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
patterns: Extracted pattern assertions.
|
|
125
|
+
profile_id: Target profile.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
List of SoftPromptTemplate, ordered by category priority,
|
|
129
|
+
within token budget.
|
|
130
|
+
"""
|
|
131
|
+
# Filter to enabled categories
|
|
132
|
+
enabled = set(self._config.categories_enabled)
|
|
133
|
+
filtered = [
|
|
134
|
+
p for p in patterns if p.category.value in enabled
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
# Group by category
|
|
138
|
+
grouped: dict[str, list[PatternAssertion]] = defaultdict(list)
|
|
139
|
+
for p in filtered:
|
|
140
|
+
grouped[p.category.value].append(p)
|
|
141
|
+
|
|
142
|
+
prompts: list[SoftPromptTemplate] = []
|
|
143
|
+
# Reserve tokens for assembly header + separators
|
|
144
|
+
header_overhead = self._estimate_tokens(
|
|
145
|
+
"# User Profile (auto-learned)"
|
|
146
|
+
) + 2 # newlines
|
|
147
|
+
running_token_count = header_overhead
|
|
148
|
+
|
|
149
|
+
for category in CATEGORY_PRIORITY_ORDER:
|
|
150
|
+
if category not in grouped:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
template = self._render_category(
|
|
154
|
+
category, grouped[category], profile_id,
|
|
155
|
+
)
|
|
156
|
+
if template is None:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
token_count = self._estimate_tokens(template.content)
|
|
160
|
+
|
|
161
|
+
if running_token_count + token_count > self._config.max_prompt_tokens:
|
|
162
|
+
remaining = self._config.max_prompt_tokens - running_token_count
|
|
163
|
+
template = self._trim_content(template, remaining)
|
|
164
|
+
token_count = self._estimate_tokens(template.content)
|
|
165
|
+
if not template.content or len(template.content) < 20:
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
template.token_count = token_count
|
|
169
|
+
prompts.append(template)
|
|
170
|
+
running_token_count += token_count
|
|
171
|
+
|
|
172
|
+
return self._validate(prompts)
|
|
173
|
+
|
|
174
|
+
def assemble(self, prompts: list[SoftPromptTemplate]) -> str:
|
|
175
|
+
"""Assemble prompts into a single text block with header.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
prompts: List of templates to assemble.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Assembled text or empty string if no prompts.
|
|
182
|
+
"""
|
|
183
|
+
if not prompts:
|
|
184
|
+
return ""
|
|
185
|
+
|
|
186
|
+
lines = ["# User Profile (auto-learned)", ""]
|
|
187
|
+
|
|
188
|
+
# Sort by priority order
|
|
189
|
+
order_map = {
|
|
190
|
+
cat: idx for idx, cat in enumerate(CATEGORY_PRIORITY_ORDER)
|
|
191
|
+
}
|
|
192
|
+
sorted_prompts = sorted(
|
|
193
|
+
prompts,
|
|
194
|
+
key=lambda p: order_map.get(p.category, len(CATEGORY_PRIORITY_ORDER)),
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
for prompt in sorted_prompts:
|
|
198
|
+
lines.append(prompt.content)
|
|
199
|
+
lines.append("")
|
|
200
|
+
|
|
201
|
+
result = "\n".join(lines).rstrip()
|
|
202
|
+
return result
|
|
203
|
+
|
|
204
|
+
# ------------------------------------------------------------------
|
|
205
|
+
# Rendering
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
def _render_category(
|
|
209
|
+
self,
|
|
210
|
+
category: str,
|
|
211
|
+
patterns: list[PatternAssertion],
|
|
212
|
+
profile_id: str,
|
|
213
|
+
) -> SoftPromptTemplate | None:
|
|
214
|
+
"""Render patterns for a single category into a SoftPromptTemplate."""
|
|
215
|
+
template_str = CATEGORY_TEMPLATES.get(category, "{key}: {value}")
|
|
216
|
+
values = self._extract_template_values(category, patterns)
|
|
217
|
+
|
|
218
|
+
# Fill template with defaultdict for missing keys
|
|
219
|
+
safe_values = defaultdict(str, values)
|
|
220
|
+
try:
|
|
221
|
+
content = template_str.format_map(safe_values)
|
|
222
|
+
except (KeyError, IndexError, ValueError): # pragma: no cover
|
|
223
|
+
content = ", ".join(p.value for p in patterns) # pragma: no cover
|
|
224
|
+
|
|
225
|
+
# Clean up
|
|
226
|
+
content = self._clean_content(content)
|
|
227
|
+
|
|
228
|
+
# Filter PII
|
|
229
|
+
content = self._pii_filter.filter_text(content)
|
|
230
|
+
if not content.strip():
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
# Trim to 100 tokens per category
|
|
234
|
+
content = self._trim_to_tokens(content, 100)
|
|
235
|
+
|
|
236
|
+
# Aggregate confidence
|
|
237
|
+
confidence = (
|
|
238
|
+
sum(p.confidence for p in patterns) / len(patterns)
|
|
239
|
+
if patterns else 0.0
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Collect source IDs
|
|
243
|
+
source_ids = [sid for p in patterns for sid in p.source_ids]
|
|
244
|
+
|
|
245
|
+
return SoftPromptTemplate(
|
|
246
|
+
prompt_id=str(uuid.uuid4()),
|
|
247
|
+
profile_id=profile_id,
|
|
248
|
+
category=category,
|
|
249
|
+
content=content,
|
|
250
|
+
source_pattern_ids=source_ids,
|
|
251
|
+
confidence=confidence,
|
|
252
|
+
effectiveness=0.5,
|
|
253
|
+
token_count=0,
|
|
254
|
+
retention_score=1.0,
|
|
255
|
+
active=True,
|
|
256
|
+
version=1,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
@staticmethod
|
|
260
|
+
def _extract_template_values(
|
|
261
|
+
category: str,
|
|
262
|
+
patterns: list[PatternAssertion],
|
|
263
|
+
) -> dict[str, str]:
|
|
264
|
+
"""Extract placeholder values from patterns for a category template."""
|
|
265
|
+
values: dict[str, str] = {}
|
|
266
|
+
pat_values = [p.value for p in patterns]
|
|
267
|
+
|
|
268
|
+
if category == "identity":
|
|
269
|
+
values["role"] = pat_values[0] if pat_values else ""
|
|
270
|
+
values["domains"] = ", ".join(pat_values)
|
|
271
|
+
org_patterns = [
|
|
272
|
+
p for p in patterns
|
|
273
|
+
if "organization" in p.key or "company" in p.key
|
|
274
|
+
]
|
|
275
|
+
values["organization"] = (
|
|
276
|
+
org_patterns[0].value if org_patterns else ""
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
elif category == "tech_preference":
|
|
280
|
+
values["technologies"] = ", ".join(pat_values)
|
|
281
|
+
|
|
282
|
+
elif category == "communication_style":
|
|
283
|
+
values["style"] = pat_values[0] if pat_values else ""
|
|
284
|
+
values["specific_preferences"] = ", ".join(pat_values[1:])
|
|
285
|
+
|
|
286
|
+
elif category == "workflow_pattern":
|
|
287
|
+
values["workflow_description"] = "; ".join(pat_values)
|
|
288
|
+
|
|
289
|
+
elif category == "project_context":
|
|
290
|
+
values["project_name"] = pat_values[0] if pat_values else ""
|
|
291
|
+
values["context_summary"] = ", ".join(pat_values[1:])
|
|
292
|
+
|
|
293
|
+
elif category == "decision_history":
|
|
294
|
+
values["decisions"] = ", ".join(pat_values)
|
|
295
|
+
|
|
296
|
+
elif category == "avoidance":
|
|
297
|
+
values["avoid_list"] = ", ".join(pat_values)
|
|
298
|
+
|
|
299
|
+
else:
|
|
300
|
+
# Generic fallback
|
|
301
|
+
values["key"] = patterns[0].key if patterns else ""
|
|
302
|
+
values["value"] = ", ".join(pat_values)
|
|
303
|
+
|
|
304
|
+
return values
|
|
305
|
+
|
|
306
|
+
# ------------------------------------------------------------------
|
|
307
|
+
# Validation
|
|
308
|
+
# ------------------------------------------------------------------
|
|
309
|
+
|
|
310
|
+
def _validate(
|
|
311
|
+
self, prompts: list[SoftPromptTemplate],
|
|
312
|
+
) -> list[SoftPromptTemplate]:
|
|
313
|
+
"""Validate prompts: no empty content, no PII, within token limits."""
|
|
314
|
+
valid: list[SoftPromptTemplate] = []
|
|
315
|
+
seen_categories: set[str] = set()
|
|
316
|
+
|
|
317
|
+
for prompt in prompts:
|
|
318
|
+
if not prompt.content.strip():
|
|
319
|
+
logger.warning(
|
|
320
|
+
"Skipping empty prompt for category %s", prompt.category,
|
|
321
|
+
)
|
|
322
|
+
continue
|
|
323
|
+
if self._pii_filter.has_pii(prompt.content):
|
|
324
|
+
logger.warning(
|
|
325
|
+
"PII detected in prompt for category %s, skipping",
|
|
326
|
+
prompt.category,
|
|
327
|
+
)
|
|
328
|
+
continue
|
|
329
|
+
if prompt.token_count > 100:
|
|
330
|
+
logger.warning(
|
|
331
|
+
"Prompt for %s exceeds 100 tokens (%d), trimming",
|
|
332
|
+
prompt.category, prompt.token_count,
|
|
333
|
+
)
|
|
334
|
+
prompt.content = self._trim_to_tokens(prompt.content, 100)
|
|
335
|
+
prompt.token_count = self._estimate_tokens(prompt.content)
|
|
336
|
+
if prompt.category in seen_categories:
|
|
337
|
+
logger.warning(
|
|
338
|
+
"Duplicate active category %s, skipping", prompt.category,
|
|
339
|
+
)
|
|
340
|
+
continue
|
|
341
|
+
seen_categories.add(prompt.category)
|
|
342
|
+
valid.append(prompt)
|
|
343
|
+
|
|
344
|
+
return valid
|
|
345
|
+
|
|
346
|
+
# ------------------------------------------------------------------
|
|
347
|
+
# Token utilities
|
|
348
|
+
# ------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
@staticmethod
|
|
351
|
+
def _estimate_tokens(text: str) -> int:
|
|
352
|
+
"""Estimate token count from text (~1.3 tokens per word)."""
|
|
353
|
+
word_count = len(text.split())
|
|
354
|
+
return max(1, int(word_count * 1.3))
|
|
355
|
+
|
|
356
|
+
@staticmethod
|
|
357
|
+
def _trim_to_tokens(text: str, max_tokens: int) -> str:
|
|
358
|
+
"""Trim text to fit within a token budget by removing sentences."""
|
|
359
|
+
current = SoftPromptGenerator._estimate_tokens(text)
|
|
360
|
+
if current <= max_tokens:
|
|
361
|
+
return text
|
|
362
|
+
|
|
363
|
+
# Split into sentences
|
|
364
|
+
import re
|
|
365
|
+
sentences = re.split(r"(?<=\.)\s+", text)
|
|
366
|
+
result_sentences: list[str] = []
|
|
367
|
+
accumulated = 0
|
|
368
|
+
|
|
369
|
+
for sentence in sentences:
|
|
370
|
+
sent_tokens = SoftPromptGenerator._estimate_tokens(sentence)
|
|
371
|
+
if accumulated + sent_tokens > max_tokens:
|
|
372
|
+
break
|
|
373
|
+
result_sentences.append(sentence)
|
|
374
|
+
accumulated += sent_tokens
|
|
375
|
+
|
|
376
|
+
if not result_sentences:
|
|
377
|
+
# Take first N words if no sentence fits
|
|
378
|
+
words = text.split()
|
|
379
|
+
target_words = max(1, int(max_tokens / 1.3))
|
|
380
|
+
return " ".join(words[:target_words]).rstrip(".,;: ") + "."
|
|
381
|
+
|
|
382
|
+
result = " ".join(result_sentences)
|
|
383
|
+
if not result.endswith("."): # pragma: no cover — regex split keeps "." with chunks
|
|
384
|
+
result += "." # pragma: no cover
|
|
385
|
+
return result
|
|
386
|
+
|
|
387
|
+
@staticmethod
|
|
388
|
+
def _trim_content(
|
|
389
|
+
template: SoftPromptTemplate,
|
|
390
|
+
remaining_budget: int,
|
|
391
|
+
) -> SoftPromptTemplate:
|
|
392
|
+
"""Create a new template with content trimmed to budget (immutable)."""
|
|
393
|
+
trimmed = SoftPromptGenerator._trim_to_tokens(
|
|
394
|
+
template.content, remaining_budget,
|
|
395
|
+
)
|
|
396
|
+
return SoftPromptTemplate(
|
|
397
|
+
prompt_id=template.prompt_id,
|
|
398
|
+
profile_id=template.profile_id,
|
|
399
|
+
category=template.category,
|
|
400
|
+
content=trimmed,
|
|
401
|
+
source_pattern_ids=template.source_pattern_ids,
|
|
402
|
+
confidence=template.confidence,
|
|
403
|
+
effectiveness=template.effectiveness,
|
|
404
|
+
token_count=SoftPromptGenerator._estimate_tokens(trimmed),
|
|
405
|
+
retention_score=template.retention_score,
|
|
406
|
+
active=template.active,
|
|
407
|
+
version=template.version,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# ------------------------------------------------------------------
|
|
411
|
+
# Content cleaning
|
|
412
|
+
# ------------------------------------------------------------------
|
|
413
|
+
|
|
414
|
+
@staticmethod
|
|
415
|
+
def _clean_content(content: str) -> str:
|
|
416
|
+
"""Strip extra whitespace, remove empty sentences."""
|
|
417
|
+
import re
|
|
418
|
+
# Collapse multiple spaces
|
|
419
|
+
content = re.sub(r"\s+", " ", content).strip()
|
|
420
|
+
# Remove empty sentences (". .")
|
|
421
|
+
content = re.sub(r"\.\s*\.", ".", content)
|
|
422
|
+
# Ensure ends with period
|
|
423
|
+
if content and not content.endswith("."):
|
|
424
|
+
content += "."
|
|
425
|
+
return content
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
# Licensed under the MIT License - see LICENSE file
|
|
3
3
|
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
4
|
|
|
5
|
-
"""SuperLocalMemory V3 — Retrieval Engine (
|
|
5
|
+
"""SuperLocalMemory V3 — Retrieval Engine (6-Channel Orchestrator).
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
6 channels -> single RRF fusion -> optional cross-encoder rerank.
|
|
8
|
+
Channels: semantic, BM25, entity_graph, temporal, spreading_activation, hopfield.
|
|
8
9
|
Replaces V1's broken 10-channel triple-re-fusion pipeline.
|
|
9
10
|
|
|
10
11
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
@@ -28,6 +29,7 @@ from superlocalmemory.storage.models import (
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
29
30
|
from superlocalmemory.retrieval.bm25_channel import BM25Channel
|
|
30
31
|
from superlocalmemory.retrieval.entity_channel import EntityGraphChannel
|
|
32
|
+
from superlocalmemory.retrieval.hopfield_channel import HopfieldChannel
|
|
31
33
|
from superlocalmemory.retrieval.semantic_channel import SemanticChannel
|
|
32
34
|
from superlocalmemory.retrieval.temporal_channel import TemporalChannel
|
|
33
35
|
from superlocalmemory.storage.database import DatabaseManager
|
|
@@ -47,7 +49,7 @@ class EmbeddingProvider(Protocol):
|
|
|
47
49
|
|
|
48
50
|
|
|
49
51
|
class RetrievalEngine:
|
|
50
|
-
"""
|
|
52
|
+
"""6-channel retrieval: semantic + BM25 + entity_graph + temporal + spreading_activation + hopfield.
|
|
51
53
|
|
|
52
54
|
Usage::
|
|
53
55
|
engine = RetrievalEngine(db, config, channels, embedder)
|
|
@@ -71,6 +73,8 @@ class RetrievalEngine:
|
|
|
71
73
|
self._bm25: BM25Channel | None = channels.get("bm25")
|
|
72
74
|
self._entity: EntityGraphChannel | None = channels.get("entity_graph")
|
|
73
75
|
self._temporal: TemporalChannel | None = channels.get("temporal")
|
|
76
|
+
# Phase G: Hopfield channel (6th)
|
|
77
|
+
self._hopfield: HopfieldChannel | None = channels.get("hopfield")
|
|
74
78
|
self._embedder = embedder
|
|
75
79
|
self._reranker = reranker
|
|
76
80
|
self._strategy = strategy or QueryStrategyClassifier()
|
|
@@ -90,6 +94,9 @@ class RetrievalEngine:
|
|
|
90
94
|
self._registry.register_channel("entity_graph", self._entity)
|
|
91
95
|
if self._temporal is not None:
|
|
92
96
|
self._registry.register_channel("temporal", self._temporal)
|
|
97
|
+
# Phase G: Hopfield channel (6th) — needs embedding input
|
|
98
|
+
if self._hopfield is not None:
|
|
99
|
+
self._registry.register_channel("hopfield", self._hopfield, needs_embedding=True)
|
|
93
100
|
|
|
94
101
|
def recall(
|
|
95
102
|
self, query: str, profile_id: str,
|
|
@@ -223,6 +230,17 @@ class RetrievalEngine:
|
|
|
223
230
|
except Exception as exc:
|
|
224
231
|
logger.warning("Temporal channel: %s", exc)
|
|
225
232
|
|
|
233
|
+
# Phase G: Hopfield channel (6th) — energy-based pattern completion
|
|
234
|
+
if self._hopfield is not None and "hopfield" not in disabled:
|
|
235
|
+
try:
|
|
236
|
+
q_emb = self._embedder.embed(query) if self._embedder else None
|
|
237
|
+
if q_emb is not None:
|
|
238
|
+
r = self._hopfield.search(q_emb, profile_id, self._config.hopfield_top_k)
|
|
239
|
+
if r:
|
|
240
|
+
out["hopfield"] = r
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
logger.warning("Hopfield channel: %s", exc)
|
|
243
|
+
|
|
226
244
|
return out
|
|
227
245
|
|
|
228
246
|
# -- Fact loading -------------------------------------------------------
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Forgetting filter for retrieval pipeline.
|
|
6
|
+
|
|
7
|
+
Post-retrieval filter that adjusts scores based on Ebbinghaus retention:
|
|
8
|
+
- Active/warm facts: score weighted by lifecycle_weight
|
|
9
|
+
- Cold facts: score reduced (weight = 0.3)
|
|
10
|
+
- Archive/forgotten facts: REMOVED from results entirely
|
|
11
|
+
|
|
12
|
+
Integrates with ChannelRegistry.register_filter() using the FilterFn
|
|
13
|
+
signature: (all_results, profile_id, context) -> filtered_results.
|
|
14
|
+
|
|
15
|
+
HR-06: When config.enabled=False, returns results unchanged.
|
|
16
|
+
|
|
17
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
18
|
+
License: MIT
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from typing import TYPE_CHECKING, Any
|
|
25
|
+
|
|
26
|
+
from superlocalmemory.core.config import ForgettingConfig
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from superlocalmemory.retrieval.channel_registry import ChannelRegistry
|
|
30
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Lifecycle zone weights — same as in ebbinghaus.py
|
|
36
|
+
_ZONE_WEIGHTS: dict[str, float] = {
|
|
37
|
+
"active": 1.0,
|
|
38
|
+
"warm": 0.7,
|
|
39
|
+
"cold": 0.3,
|
|
40
|
+
"archive": 0.0,
|
|
41
|
+
"forgotten": 0.0,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Zones where facts are excluded from results
|
|
45
|
+
_EXCLUDED_ZONES: frozenset[str] = frozenset({"archive", "forgotten"})
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ForgettingFilter:
|
|
49
|
+
"""Post-retrieval filter that applies Ebbinghaus retention weighting.
|
|
50
|
+
|
|
51
|
+
Removes archived/forgotten facts and adjusts scores for other zones.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
__slots__ = ("_db", "_config")
|
|
55
|
+
|
|
56
|
+
def __init__(self, db: DatabaseManager, config: ForgettingConfig) -> None:
|
|
57
|
+
self._db = db
|
|
58
|
+
self._config = config
|
|
59
|
+
|
|
60
|
+
def filter(
|
|
61
|
+
self,
|
|
62
|
+
all_results: dict[str, list[tuple[str, float]]],
|
|
63
|
+
profile_id: str,
|
|
64
|
+
context: Any,
|
|
65
|
+
) -> dict[str, list[tuple[str, float]]]:
|
|
66
|
+
"""Apply forgetting filter to retrieval results.
|
|
67
|
+
|
|
68
|
+
Matches FilterFn signature from channel_registry.py.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
all_results: Channel name -> [(fact_id, score)] dict.
|
|
72
|
+
profile_id: Current profile.
|
|
73
|
+
context: Optional context (unused).
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Filtered results dict with scores adjusted by retention weight.
|
|
77
|
+
"""
|
|
78
|
+
# HR-06: If disabled, return unchanged
|
|
79
|
+
if not self._config.enabled:
|
|
80
|
+
return all_results
|
|
81
|
+
|
|
82
|
+
# Collect all unique fact_ids across all channels
|
|
83
|
+
all_fact_ids: set[str] = set()
|
|
84
|
+
for channel_results in all_results.values():
|
|
85
|
+
for fact_id, _ in channel_results:
|
|
86
|
+
all_fact_ids.add(fact_id)
|
|
87
|
+
|
|
88
|
+
if not all_fact_ids:
|
|
89
|
+
return all_results
|
|
90
|
+
|
|
91
|
+
# Batch query retention data
|
|
92
|
+
retention_rows = self._db.batch_get_retention(
|
|
93
|
+
list(all_fact_ids), profile_id,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Build lookup: fact_id -> retention row
|
|
97
|
+
retention_map: dict[str, dict] = {}
|
|
98
|
+
for row in retention_rows:
|
|
99
|
+
retention_map[row["fact_id"]] = row
|
|
100
|
+
|
|
101
|
+
# Filter and weight each channel's results
|
|
102
|
+
filtered: dict[str, list[tuple[str, float]]] = {}
|
|
103
|
+
for channel_name, channel_results in all_results.items():
|
|
104
|
+
new_results: list[tuple[str, float]] = []
|
|
105
|
+
for fact_id, score in channel_results:
|
|
106
|
+
ret_data = retention_map.get(fact_id)
|
|
107
|
+
|
|
108
|
+
if ret_data is None:
|
|
109
|
+
# No retention data yet -> new memory, keep as-is
|
|
110
|
+
new_results.append((fact_id, score))
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
zone = ret_data.get("lifecycle_zone", "active")
|
|
114
|
+
|
|
115
|
+
if zone in _EXCLUDED_ZONES:
|
|
116
|
+
# Archive/forgotten: remove from results
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Apply weight
|
|
120
|
+
weight = _ZONE_WEIGHTS.get(zone, 1.0)
|
|
121
|
+
new_results.append((fact_id, score * weight))
|
|
122
|
+
|
|
123
|
+
filtered[channel_name] = new_results
|
|
124
|
+
|
|
125
|
+
return filtered
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def register_forgetting_filter(
|
|
129
|
+
registry: ChannelRegistry,
|
|
130
|
+
db: DatabaseManager,
|
|
131
|
+
config: ForgettingConfig,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Register the forgetting filter with the channel registry.
|
|
134
|
+
|
|
135
|
+
Does nothing if config.enabled is False.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
registry: Channel registry to register with.
|
|
139
|
+
db: Database manager for retention queries.
|
|
140
|
+
config: Forgetting configuration.
|
|
141
|
+
"""
|
|
142
|
+
if not config.enabled:
|
|
143
|
+
return
|
|
144
|
+
f = ForgettingFilter(db, config)
|
|
145
|
+
registry.register_filter(f.filter)
|