memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. memnex/__init__.py +31 -0
  2. memnex/__main__.py +6 -0
  3. memnex/_plugin/.claude-plugin/plugin.json +24 -0
  4. memnex/_plugin/.mcp.json +9 -0
  5. memnex/_plugin/__init__.py +0 -0
  6. memnex/_plugin/hooks/hooks.json +43 -0
  7. memnex/_plugin/scripts/hook-runner.py +166 -0
  8. memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
  9. memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
  10. memnex/_plugin/skills/mem-search/SKILL.md +85 -0
  11. memnex/_plugin/skills/mem-write/SKILL.md +78 -0
  12. memnex/adapters/__init__.py +14 -0
  13. memnex/adapters/claude_skill.py +169 -0
  14. memnex/adapters/cli.py +525 -0
  15. memnex/adapters/http_api.py +314 -0
  16. memnex/adapters/mcp_server.py +448 -0
  17. memnex/compaction.py +563 -0
  18. memnex/config.py +366 -0
  19. memnex/core/__init__.py +13 -0
  20. memnex/core/associator/__init__.py +8 -0
  21. memnex/core/associator/domain_classifier.py +75 -0
  22. memnex/core/associator/entity_aligner.py +127 -0
  23. memnex/core/associator/ref_linker.py +197 -0
  24. memnex/core/associator/term_mapper.py +77 -0
  25. memnex/core/dictionaries/__init__.py +50 -0
  26. memnex/core/engine.py +667 -0
  27. memnex/core/extractors/__init__.py +15 -0
  28. memnex/core/extractors/docx.py +97 -0
  29. memnex/core/extractors/image.py +233 -0
  30. memnex/core/extractors/markdown.py +139 -0
  31. memnex/core/extractors/pdf.py +133 -0
  32. memnex/core/extractors/vision_mapper.py +131 -0
  33. memnex/core/handlers/__init__.py +7 -0
  34. memnex/core/handlers/clipboard.py +40 -0
  35. memnex/core/handlers/file_handler.py +62 -0
  36. memnex/core/handlers/url_handler.py +132 -0
  37. memnex/llm/__init__.py +25 -0
  38. memnex/llm/enhancer.py +226 -0
  39. memnex/llm/fallback_chain.py +87 -0
  40. memnex/llm/injection_guard.py +178 -0
  41. memnex/llm/provider.py +130 -0
  42. memnex/llm/providers/__init__.py +22 -0
  43. memnex/llm/providers/anthropic.py +135 -0
  44. memnex/llm/providers/local.py +135 -0
  45. memnex/llm/providers/rule_based.py +68 -0
  46. memnex/llm/sanitizer.py +67 -0
  47. memnex/models/__init__.py +68 -0
  48. memnex/models/feedback.py +42 -0
  49. memnex/models/graph.py +33 -0
  50. memnex/models/memory.py +102 -0
  51. memnex/models/misc.py +185 -0
  52. memnex/models/paragraph.py +45 -0
  53. memnex/models/search.py +51 -0
  54. memnex/models/source.py +23 -0
  55. memnex/models/task.py +62 -0
  56. memnex/processing/__init__.py +1 -0
  57. memnex/processing/graph_builder.py +278 -0
  58. memnex/processing/merger/__init__.py +6 -0
  59. memnex/processing/merger/confidence_calculator.py +127 -0
  60. memnex/processing/merger/conflict_resolver.py +116 -0
  61. memnex/retrieval/__init__.py +1 -0
  62. memnex/retrieval/dedup.py +386 -0
  63. memnex/retrieval/embedding.py +289 -0
  64. memnex/retrieval/reranker.py +299 -0
  65. memnex/service.py +902 -0
  66. memnex/storage/__init__.py +65 -0
  67. memnex/storage/base.py +132 -0
  68. memnex/storage/changelog.py +106 -0
  69. memnex/storage/feedback.py +486 -0
  70. memnex/storage/lite/__init__.py +5 -0
  71. memnex/storage/lite/store.py +606 -0
  72. memnex/storage/vector.py +265 -0
  73. memnex/wiki/__init__.py +11 -0
  74. memnex/wiki/community.py +221 -0
  75. memnex/wiki/compiler.py +545 -0
  76. memnex/wiki/generator.py +270 -0
  77. memnex/wiki/search.py +282 -0
  78. memnex/worker.py +412 -0
  79. memplex-3.2.0.dist-info/METADATA +37 -0
  80. memplex-3.2.0.dist-info/RECORD +83 -0
  81. memplex-3.2.0.dist-info/WHEEL +5 -0
  82. memplex-3.2.0.dist-info/entry_points.txt +2 -0
  83. memplex-3.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,299 @@
1
+ """Reranker -- multi-dimensional result re-ranking + optional CrossEncoder.
2
+
3
+ Two-stage retrieval architecture::
4
+
5
+ Stage 1 (bi-encoder, fast):
6
+ Reranker scores candidates across 5 dimensions and returns top-K.
7
+
8
+ Stage 2 (cross-encoder, precise, optional):
9
+ CrossEncoderReranker re-scores the top-K with a jointly-encoded model
10
+ for significantly higher accuracy on ambiguous queries.
11
+
12
+ Usage::
13
+
14
+ reranker = Reranker(embedding_service)
15
+ ranked = reranker.rerank("query text", search_results, top_k=10)
16
+
17
+ # Optional stage 2
18
+ cross = CrossEncoderReranker(enabled=True)
19
+ ranked = cross.rerank("query text", ranked)
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ import math
26
+ from datetime import datetime
27
+ from typing import Dict, List, Optional, TYPE_CHECKING
28
+
29
+ from memnex.retrieval.embedding import EmbeddingService, Vector
30
+ from memnex.models import SearchResult, SourceType
31
+
32
+ if TYPE_CHECKING:
33
+ from memnex.storage.base import MemoryStore
34
+ from memnex.models import Function
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # ── Helper ────────────────────────────────────────────────────────────
40
+
41
+
42
+ def cosine_similarity(a: Vector, b: Vector) -> float:
43
+ """Compute cosine similarity between two vectors."""
44
+ dot = sum(x * y for x, y in zip(a, b))
45
+ norm_a = sum(x * x for x in a) ** 0.5
46
+ norm_b = sum(x * x for x in b) ** 0.5
47
+ return dot / (norm_a * norm_b + 1e-8)
48
+
49
+
50
+ # ── 5-dimensional Reranker ────────────────────────────────────────────
51
+
52
+
53
+ class Reranker:
54
+ """Multi-path retrieval result re-ranker.
55
+
56
+ Scoring dimensions and default weights::
57
+
58
+ raw_relevance 0.25 -- original score from each retrieval path
59
+ semantic_similarity 0.30 -- cosine(query_vec, result_vec)
60
+ recency_decay 0.15 -- exponential decay (~0.5 at 30 days)
61
+ source_authority 0.15 -- requirement > meeting > code > wiki
62
+ frequency 0.15 -- log-scaled access count * recency
63
+
64
+ Parameters
65
+ ----------
66
+ embedding_service:
67
+ Provides ``embed()`` for computing semantic similarity.
68
+ weights:
69
+ Optional custom dimension weights (must sum to ~1.0).
70
+ storage:
71
+ Optional :class:`MemoryStore` for reading *access_count*.
72
+ """
73
+
74
+ _SOURCE_WEIGHTS: Dict[SourceType, float] = {
75
+ SourceType.REQUIREMENT: 1.0,
76
+ SourceType.MEETING: 0.8,
77
+ SourceType.CODE: 0.6,
78
+ SourceType.WIKI: 0.4,
79
+ }
80
+
81
+ def __init__(
82
+ self,
83
+ embedding_service: EmbeddingService,
84
+ weights: Optional[Dict[str, float]] = None,
85
+ storage: Optional["MemoryStore"] = None,
86
+ ) -> None:
87
+ self.embedder = embedding_service
88
+ self.storage = storage
89
+ self.weights = weights or {
90
+ "raw_relevance": 0.25,
91
+ "semantic_similarity": 0.30,
92
+ "recency_decay": 0.15,
93
+ "source_authority": 0.15,
94
+ "frequency": 0.15,
95
+ }
96
+
97
+ # ── Public API ──────────────────────────────────────────────────
98
+
99
+
100
+ def rerank(
101
+ self,
102
+ query: str,
103
+ results: List[SearchResult],
104
+ top_k: int = 10,
105
+ query_vector: Optional[Vector] = None,
106
+ ) -> List[SearchResult]:
107
+ """Re-rank *results* using the 5-dimensional scoring model.
108
+
109
+ Parameters
110
+ ----------
111
+ query:
112
+ Original query text.
113
+ results:
114
+ Candidate results from multi-path retrieval.
115
+ top_k:
116
+ Maximum number of results to return.
117
+ query_vector:
118
+ Pre-computed query embedding (avoids re-embedding).
119
+ """
120
+ if not results:
121
+ return []
122
+
123
+ if query_vector is None:
124
+ query_vector = self.embedder.embed(query)
125
+
126
+ scored: list[tuple[float, SearchResult]] = []
127
+
128
+ for r in results:
129
+ # 1. Raw relevance from the retrieval path
130
+ raw_score = r.relevance_score
131
+
132
+ # 2. Semantic similarity (reuse vector_cache when available)
133
+ if r.vector_cache is not None:
134
+ result_vector = r.vector_cache
135
+ else:
136
+ result_vector = self.embedder.embed(r.summary)
137
+ semantic_score = cosine_similarity(query_vector, result_vector)
138
+
139
+ # 3. Recency decay
140
+ recency_score = self._recency_decay(r.updated_at)
141
+
142
+ # 4. Source authority
143
+ source_weight = self._source_weight(r.source_type)
144
+
145
+ # 5. Frequency (access count * recency of last access)
146
+ func: Optional["Function"] = None
147
+ if self.storage is not None:
148
+ try:
149
+ func = self.storage.get(r.func_id)
150
+ except Exception:
151
+ func = None
152
+ frequency_score = self._frequency_score(func) if func else 0.5
153
+
154
+ # Weighted sum
155
+ final_score = (
156
+ raw_score * self.weights["raw_relevance"]
157
+ + semantic_score * self.weights["semantic_similarity"]
158
+ + recency_score * self.weights["recency_decay"]
159
+ + source_weight * self.weights["source_authority"]
160
+ + frequency_score * self.weights["frequency"]
161
+ )
162
+
163
+ scored.append((final_score, r))
164
+
165
+ scored.sort(key=lambda x: x[0], reverse=True)
166
+ return [r for _, r in scored[:top_k]]
167
+
168
+ # ── Dimension scorers ───────────────────────────────────────────
169
+
170
+
171
+ @staticmethod
172
+ def _recency_decay(updated_at: Optional[datetime]) -> float:
173
+ """Exponential time decay. ~0.5 at 30 days, range [0, 1].
174
+
175
+ Uses the same formula as the design spec:
176
+ ``min(1.0, exp(-days / 60))``
177
+ """
178
+ if updated_at is None:
179
+ return 0.5
180
+ # Handle both datetime objects and ISO strings
181
+ if isinstance(updated_at, str):
182
+ try:
183
+ updated_at = datetime.fromisoformat(updated_at)
184
+ except (ValueError, TypeError):
185
+ return 0.5
186
+ days_since = max(0, (datetime.now() - updated_at).days)
187
+ return min(1.0, math.exp(-days_since / 60))
188
+
189
+ def _source_weight(self, source_type: SourceType) -> float:
190
+ """Authority weight by source type.
191
+
192
+ requirement=1.0 > meeting=0.8 > code=0.6 > wiki=0.4.
193
+ """
194
+ return self._SOURCE_WEIGHTS.get(source_type, 0.5)
195
+
196
+ @staticmethod
197
+ def _frequency_score(func: "Function") -> float:
198
+ """Access-frequency score combining count and recency.
199
+
200
+ ``freq = log(1+count) / log(1+100)`` normalised to [0, 1].
201
+ Combined with a last-access recency factor: 60% freq + 40% recency.
202
+ """
203
+ access_count = getattr(func, "access_count", 0)
204
+ last_accessed = getattr(func, "last_accessed_at", None)
205
+
206
+ # Frequency factor: log-scaled, normalised
207
+ freq = math.log1p(access_count) / math.log1p(100)
208
+
209
+ # Recency of last access
210
+ if last_accessed is not None:
211
+ if isinstance(last_accessed, str):
212
+ try:
213
+ last_accessed = datetime.fromisoformat(last_accessed)
214
+ except (ValueError, TypeError):
215
+ last_accessed = None
216
+ if last_accessed is not None:
217
+ days = max(0, (datetime.now() - last_accessed).days)
218
+ recency = min(1.0, math.exp(-days / 60))
219
+ else:
220
+ recency = 0.3
221
+ else:
222
+ recency = 0.3
223
+
224
+ return freq * 0.6 + recency * 0.4
225
+
226
+
227
+ # ── CrossEncoderReranker (stage 2) ────────────────────────────────────
228
+
229
+
230
+ class CrossEncoderReranker:
231
+ """Cross-encoder precision re-ranker (stage 2 of two-stage retrieval).
232
+
233
+ Uses a jointly-encoded model (e.g. BGE-reranker-v2-m3) for significantly
234
+ higher accuracy than bi-encoder cosine similarity. Only runs on the
235
+ top-K candidates from the bi-encoder stage, so latency impact is minimal.
236
+
237
+ The model is **lazily loaded** on first use to avoid blocking startup.
238
+
239
+ Parameters
240
+ ----------
241
+ model_name:
242
+ HuggingFace model identifier.
243
+ enabled:
244
+ Master switch. When *False*, :meth:`rerank` returns input unchanged.
245
+ """
246
+
247
+ def __init__(
248
+ self,
249
+ model_name: str = "BAAI/bge-reranker-v2-m3",
250
+ enabled: bool = False,
251
+ ) -> None:
252
+ self.model_name = model_name
253
+ self.enabled = enabled
254
+ self._model = None # lazy-loaded
255
+
256
+ def _load_model(self) -> None:
257
+ """Load the cross-encoder model on first call."""
258
+ if self._model is not None:
259
+ return
260
+ try:
261
+ from sentence_transformers import CrossEncoder # type: ignore
262
+
263
+ self._model = CrossEncoder(self.model_name)
264
+ logger.info("CrossEncoder loaded: %s", self.model_name)
265
+ except ImportError:
266
+ logger.warning(
267
+ "CrossEncoder unavailable (pip install sentence-transformers); "
268
+ "skipping precision re-ranking"
269
+ )
270
+ self.enabled = False
271
+ except Exception as exc:
272
+ logger.warning("Failed to load CrossEncoder %s: %s", self.model_name, exc)
273
+ self.enabled = False
274
+
275
+ # ── Public API ──────────────────────────────────────────────────
276
+
277
+
278
+ def rerank(
279
+ self, query: str, results: List[SearchResult]
280
+ ) -> List[SearchResult]:
281
+ """Re-score *results* with the cross-encoder.
282
+
283
+ Returns results sorted by cross-encoder score (descending).
284
+ When the model is unavailable or disabled, returns input unchanged.
285
+ """
286
+ if not self.enabled or not results:
287
+ return results
288
+
289
+ self._load_model()
290
+ if self._model is None:
291
+ return results
292
+
293
+ pairs = [(query, r.summary) for r in results]
294
+ scores = self._model.predict(pairs)
295
+ for r, score in zip(results, scores):
296
+ r.relevance_score = float(score)
297
+
298
+ results.sort(key=lambda x: x.relevance_score, reverse=True)
299
+ return results