memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. memnex/__init__.py +31 -0
  2. memnex/__main__.py +6 -0
  3. memnex/_plugin/.claude-plugin/plugin.json +24 -0
  4. memnex/_plugin/.mcp.json +9 -0
  5. memnex/_plugin/__init__.py +0 -0
  6. memnex/_plugin/hooks/hooks.json +43 -0
  7. memnex/_plugin/scripts/hook-runner.py +166 -0
  8. memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
  9. memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
  10. memnex/_plugin/skills/mem-search/SKILL.md +85 -0
  11. memnex/_plugin/skills/mem-write/SKILL.md +78 -0
  12. memnex/adapters/__init__.py +14 -0
  13. memnex/adapters/claude_skill.py +169 -0
  14. memnex/adapters/cli.py +525 -0
  15. memnex/adapters/http_api.py +314 -0
  16. memnex/adapters/mcp_server.py +448 -0
  17. memnex/compaction.py +563 -0
  18. memnex/config.py +366 -0
  19. memnex/core/__init__.py +13 -0
  20. memnex/core/associator/__init__.py +8 -0
  21. memnex/core/associator/domain_classifier.py +75 -0
  22. memnex/core/associator/entity_aligner.py +127 -0
  23. memnex/core/associator/ref_linker.py +197 -0
  24. memnex/core/associator/term_mapper.py +77 -0
  25. memnex/core/dictionaries/__init__.py +50 -0
  26. memnex/core/engine.py +667 -0
  27. memnex/core/extractors/__init__.py +15 -0
  28. memnex/core/extractors/docx.py +97 -0
  29. memnex/core/extractors/image.py +233 -0
  30. memnex/core/extractors/markdown.py +139 -0
  31. memnex/core/extractors/pdf.py +133 -0
  32. memnex/core/extractors/vision_mapper.py +131 -0
  33. memnex/core/handlers/__init__.py +7 -0
  34. memnex/core/handlers/clipboard.py +40 -0
  35. memnex/core/handlers/file_handler.py +62 -0
  36. memnex/core/handlers/url_handler.py +132 -0
  37. memnex/llm/__init__.py +25 -0
  38. memnex/llm/enhancer.py +226 -0
  39. memnex/llm/fallback_chain.py +87 -0
  40. memnex/llm/injection_guard.py +178 -0
  41. memnex/llm/provider.py +130 -0
  42. memnex/llm/providers/__init__.py +22 -0
  43. memnex/llm/providers/anthropic.py +135 -0
  44. memnex/llm/providers/local.py +135 -0
  45. memnex/llm/providers/rule_based.py +68 -0
  46. memnex/llm/sanitizer.py +67 -0
  47. memnex/models/__init__.py +68 -0
  48. memnex/models/feedback.py +42 -0
  49. memnex/models/graph.py +33 -0
  50. memnex/models/memory.py +102 -0
  51. memnex/models/misc.py +185 -0
  52. memnex/models/paragraph.py +45 -0
  53. memnex/models/search.py +51 -0
  54. memnex/models/source.py +23 -0
  55. memnex/models/task.py +62 -0
  56. memnex/processing/__init__.py +1 -0
  57. memnex/processing/graph_builder.py +278 -0
  58. memnex/processing/merger/__init__.py +6 -0
  59. memnex/processing/merger/confidence_calculator.py +127 -0
  60. memnex/processing/merger/conflict_resolver.py +116 -0
  61. memnex/retrieval/__init__.py +1 -0
  62. memnex/retrieval/dedup.py +386 -0
  63. memnex/retrieval/embedding.py +289 -0
  64. memnex/retrieval/reranker.py +299 -0
  65. memnex/service.py +902 -0
  66. memnex/storage/__init__.py +65 -0
  67. memnex/storage/base.py +132 -0
  68. memnex/storage/changelog.py +106 -0
  69. memnex/storage/feedback.py +486 -0
  70. memnex/storage/lite/__init__.py +5 -0
  71. memnex/storage/lite/store.py +606 -0
  72. memnex/storage/vector.py +265 -0
  73. memnex/wiki/__init__.py +11 -0
  74. memnex/wiki/community.py +221 -0
  75. memnex/wiki/compiler.py +545 -0
  76. memnex/wiki/generator.py +270 -0
  77. memnex/wiki/search.py +282 -0
  78. memnex/worker.py +412 -0
  79. memplex-3.2.0.dist-info/METADATA +37 -0
  80. memplex-3.2.0.dist-info/RECORD +83 -0
  81. memplex-3.2.0.dist-info/WHEEL +5 -0
  82. memplex-3.2.0.dist-info/entry_points.txt +2 -0
  83. memplex-3.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,265 @@
1
+ """Vector store abstraction -- migrated from the legacy ``storage/`` package.
2
+
3
+ Provides a ``VectorStore`` Protocol plus two implementations:
4
+
5
+ * ``InMemoryVectorStore`` -- TF-IDF bag-of-words cosine similarity, zero
6
+ external dependencies.
7
+ * ``ChromaVectorStore`` -- ChromaDB + sentence-transformers for production
8
+ quality embeddings.
9
+
10
+ Usage::
11
+
12
+ from memnex.storage.vector import create_vector_store
13
+
14
+ vs = create_vector_store("auto") # ChromaDB if available, else InMemory
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from dataclasses import dataclass
21
+ from typing import Dict, List, Optional, Protocol, runtime_checkable
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Re-export the list[float] alias for convenience
26
+ Vector = List[float]
27
+
28
+
29
+ @dataclass
30
+ class VectorSearchResult:
31
+ """A single vector search hit."""
32
+
33
+ id: str
34
+ score: float
35
+ text: str
36
+
37
+
38
+ # ── Protocol ────────────────────────────────────────────────────────
39
+
40
+
41
+ @runtime_checkable
42
+ class VectorStore(Protocol):
43
+ """Minimal vector store interface."""
44
+
45
+ def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None: ...
46
+
47
+ def upsert(self, id: str, vector: Vector, text: str = "") -> None: ...
48
+
49
+ def upsert_batch(self, items: Dict[str, Vector]) -> None: ...
50
+
51
+ def search(
52
+ self, query: str, top_k: int = 5, query_vector: Optional[Vector] = None
53
+ ) -> List[VectorSearchResult]: ...
54
+
55
+ def delete(self, id: str) -> None: ...
56
+
57
+ def clear(self) -> None: ...
58
+
59
+
60
+ # ── InMemory implementation ─────────────────────────────────────────
61
+
62
+
63
+ class InMemoryVectorStore:
64
+ """In-memory TF-IDF cosine similarity store.
65
+
66
+ Zero external dependencies. Suitable for Lite backend and testing.
67
+ """
68
+
69
+ def __init__(self) -> None:
70
+ self._vectors: Dict[str, tuple] = {} # id -> (text, embedding)
71
+ self._stored_vectors: Dict[str, Vector] = {} # id -> pre-stored vector
72
+ self._all_words: set = set()
73
+
74
+ def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None:
75
+ words = self._get_words(text)
76
+ self._all_words.update(words)
77
+ embedding = self._encode_with_vocab(text, words)
78
+ self._vectors[id] = (text, embedding)
79
+ self._stored_vectors.pop(id, None)
80
+
81
+ def upsert(self, id: str, vector: Vector, text: str = "") -> None:
82
+ self._stored_vectors[id] = vector
83
+ self._vectors[id] = (text, [0]) # placeholder text embedding
84
+
85
+ def upsert_batch(self, items: Dict[str, Vector]) -> None:
86
+ for id, vector in items.items():
87
+ self.upsert(id, vector)
88
+
89
+ def search(
90
+ self,
91
+ query: str,
92
+ top_k: int = 5,
93
+ query_vector: Optional[Vector] = None,
94
+ ) -> List[VectorSearchResult]:
95
+ if query_vector is not None:
96
+ return self._search_by_vector(query_vector, top_k)
97
+ query_emb = self._encode_with_vocab(query, self._get_words(query))
98
+ return self._search_by_embedding(query_emb, top_k)
99
+
100
+ def _search_by_vector(
101
+ self, query_vec: Vector, top_k: int
102
+ ) -> List[VectorSearchResult]:
103
+ scores: list = []
104
+ for vid, vec in self._stored_vectors.items():
105
+ score = self._cosine(query_vec, vec)
106
+ text = self._vectors.get(vid, ("", None))[0]
107
+ scores.append((vid, score, text))
108
+ scores.sort(key=lambda x: x[1], reverse=True)
109
+ return [VectorSearchResult(id=s[0], score=s[1], text=s[2]) for s in scores[:top_k]]
110
+
111
+ def _search_by_embedding(
112
+ self, query_emb: list, top_k: int
113
+ ) -> List[VectorSearchResult]:
114
+ scores: list = []
115
+ for vid, (text, emb) in self._vectors.items():
116
+ score = self._cosine(query_emb, emb)
117
+ scores.append((vid, score, text))
118
+ scores.sort(key=lambda x: x[1], reverse=True)
119
+ return [VectorSearchResult(id=s[0], score=s[1], text=s[2]) for s in scores[:top_k]]
120
+
121
+ def delete(self, id: str) -> None:
122
+ self._vectors.pop(id, None)
123
+ self._stored_vectors.pop(id, None)
124
+
125
+ def clear(self) -> None:
126
+ self._vectors.clear()
127
+ self._stored_vectors.clear()
128
+ self._all_words.clear()
129
+
130
+ # ── Helpers ──────────────────────────────────────────────────
131
+
132
+ @staticmethod
133
+ def _get_words(text: str) -> set:
134
+ text_lower = text.lower()
135
+ if any("一" <= c <= "鿿" for c in text):
136
+ return set(list(text_lower))
137
+ return set(text_lower.split())
138
+
139
+ def _encode_with_vocab(self, text: str, words: set) -> list:
140
+ if not self._all_words:
141
+ return [0]
142
+ return [1 if w in words else 0 for w in sorted(self._all_words)]
143
+
144
+ @staticmethod
145
+ def _cosine(a: list, b: list) -> float:
146
+ dot = sum(x * y for x, y in zip(a, b))
147
+ norm_a = sum(x * x for x in a) ** 0.5
148
+ norm_b = sum(x * x for x in b) ** 0.5
149
+ return dot / (norm_a * norm_b + 1e-10)
150
+
151
+
152
+ # ── ChromaDB implementation ─────────────────────────────────────────
153
+
154
+ try:
155
+ import chromadb # type: ignore
156
+ from chromadb.config import Settings # type: ignore
157
+
158
+ _CHROMA_AVAILABLE = True
159
+ except ImportError:
160
+ _CHROMA_AVAILABLE = False
161
+ chromadb = None # type: ignore[assignment]
162
+
163
+
164
+ class ChromaVectorStore:
165
+ """ChromaDB-backed vector store with sentence-transformers embeddings."""
166
+
167
+ def __init__(
168
+ self,
169
+ collection_name: str = "functions",
170
+ embedding_model: str = "all-MiniLM-L6-v2",
171
+ ) -> None:
172
+ if not _CHROMA_AVAILABLE:
173
+ raise ImportError(
174
+ "chromadb not installed: pip install chromadb sentence-transformers"
175
+ )
176
+ self.client = chromadb.Client(Settings(anonymized_telemetry=False))
177
+ self.collection = self.client.get_or_create_collection(
178
+ name=collection_name
179
+ )
180
+ self._embedding_model = embedding_model
181
+ self._model = None
182
+
183
+ def _get_model(self):
184
+ if self._model is None:
185
+ from sentence_transformers import SentenceTransformer # type: ignore
186
+
187
+ self._model = SentenceTransformer(self._embedding_model)
188
+ return self._model
189
+
190
+ def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None:
191
+ embedding = self._get_model().encode([text])[0]
192
+ self.collection.upsert(
193
+ ids=[id],
194
+ embeddings=[embedding.tolist()],
195
+ documents=[text],
196
+ metadatas=[metadata or {}],
197
+ )
198
+
199
+ def upsert(self, id: str, vector: Vector, text: str = "") -> None:
200
+ self.collection.upsert(
201
+ ids=[id],
202
+ embeddings=[vector if isinstance(vector, list) else list(vector)],
203
+ documents=[text],
204
+ metadatas=[{}],
205
+ )
206
+
207
+ def upsert_batch(self, items: Dict[str, Vector]) -> None:
208
+ ids = list(items.keys())
209
+ vectors = [v if isinstance(v, list) else list(v) for v in items.values()]
210
+ self.collection.upsert(
211
+ ids=ids,
212
+ embeddings=vectors,
213
+ documents=[""] * len(ids),
214
+ metadatas=[{}] * len(ids),
215
+ )
216
+
217
+ def search(
218
+ self,
219
+ query: str,
220
+ top_k: int = 5,
221
+ query_vector: Optional[Vector] = None,
222
+ ) -> List[VectorSearchResult]:
223
+ if query_vector is not None:
224
+ q_emb = query_vector if isinstance(query_vector, list) else list(query_vector)
225
+ else:
226
+ q_emb = self._get_model().encode([query])[0].tolist()
227
+ results = self.collection.query(
228
+ query_embeddings=[q_emb], n_results=top_k
229
+ )
230
+ ids = results.get("ids", [[]])[0]
231
+ distances = results.get("distances", [[]])[0]
232
+ documents = results.get("documents", [[]])[0]
233
+ return [
234
+ VectorSearchResult(id=ids[i], score=float(distances[i]), text=documents[i])
235
+ for i in range(len(ids))
236
+ ]
237
+
238
+ def delete(self, id: str) -> None:
239
+ self.collection.delete(ids=[id])
240
+
241
+ def clear(self) -> None:
242
+ self.collection.delete(where={})
243
+
244
+
245
+ # ── Factory ──────────────────────────────────────────────────────────
246
+
247
+
248
+ def create_vector_store(backend: str = "auto") -> VectorStore:
249
+ """Create a vector store by backend name.
250
+
251
+ Parameters
252
+ ----------
253
+ backend:
254
+ ``"inmemory"`` | ``"chroma"`` | ``"auto"`` (default).
255
+ ``"auto"`` prefers ChromaDB and falls back to InMemory.
256
+ """
257
+ if backend == "chroma" and _CHROMA_AVAILABLE:
258
+ return ChromaVectorStore()
259
+ if backend == "inmemory":
260
+ return InMemoryVectorStore()
261
+ if backend == "auto":
262
+ if _CHROMA_AVAILABLE:
263
+ return ChromaVectorStore()
264
+ return InMemoryVectorStore()
265
+ raise ValueError(f"Unknown vector store backend: {backend!r}")
@@ -0,0 +1,11 @@
1
+ """MemNex Wiki Layer -- compile, generate, search, and lint wiki pages."""
2
+
3
+ from memnex.wiki.compiler import WikiCompiler
4
+ from memnex.wiki.generator import LLMWikiGenerator
5
+ from memnex.wiki.search import DualIndexSearch
6
+
7
+ __all__ = [
8
+ "WikiCompiler",
9
+ "LLMWikiGenerator",
10
+ "DualIndexSearch",
11
+ ]
@@ -0,0 +1,221 @@
1
+ """GraphRAG community detection using the Louvain algorithm.
2
+
3
+ Detects communities (clusters) in the knowledge graph and generates
4
+ concept pages for each community. Falls back to domain-based grouping
5
+ when python-louvain / networkx are not available.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from dataclasses import dataclass, field
12
+ from typing import Dict, List, Optional, TYPE_CHECKING
13
+
14
+ from memnex.models import (
15
+ Function,
16
+ GraphData,
17
+ WikiPage,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from memnex.storage.base import MemoryStore
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Only use strong-relationship edge types for community detection
26
+ STRONG_EDGE_TYPES = frozenset({
27
+ "REFERENCES",
28
+ "DEPENDS_ON",
29
+ "IMPLEMENTS",
30
+ "ASSOCIATED_WITH",
31
+ })
32
+
33
+ DEFAULT_MIN_COMMUNITY_SIZE = 3
34
+
35
+
36
+ @dataclass
37
+ class Community:
38
+ """A detected community of function nodes."""
39
+
40
+ community_id: int
41
+ node_ids: List[str] = field(default_factory=list)
42
+ theme: str = ""
43
+
44
+ @property
45
+ def size(self) -> int:
46
+ return len(self.node_ids)
47
+
48
+
49
+ class GraphCommunityDetector:
50
+ """Louvain-based graph community detector.
51
+
52
+ Detects communities (clusters) in the knowledge graph using the
53
+ Louvain algorithm. Falls back to domain-based grouping when
54
+ ``python-louvain`` or ``networkx`` are not installed.
55
+
56
+ Parameters
57
+ ----------
58
+ min_community_size:
59
+ Minimum number of nodes for a community to be reported.
60
+ Communities below this threshold are discarded as noise.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ min_community_size: int = DEFAULT_MIN_COMMUNITY_SIZE,
66
+ ) -> None:
67
+ self.min_community_size = min_community_size
68
+
69
+ def detect_communities(
70
+ self,
71
+ graph: GraphData,
72
+ min_size: Optional[int] = None,
73
+ ) -> List[Community]:
74
+ """Detect communities in the graph.
75
+
76
+ Parameters
77
+ ----------
78
+ graph:
79
+ The graph data containing nodes and edges.
80
+ min_size:
81
+ Override minimum community size for this call.
82
+
83
+ Returns
84
+ -------
85
+ List of Community objects, each with a list of node IDs.
86
+ """
87
+ threshold = min_size if min_size is not None else self.min_community_size
88
+
89
+ try:
90
+ import networkx as nx # type: ignore
91
+ import community as community_louvain # type: ignore
92
+ except ImportError:
93
+ logger.info(
94
+ "python-louvain/networkx not available, "
95
+ "falling back to domain grouping"
96
+ )
97
+ return self._fallback_domain_grouping(graph, threshold)
98
+
99
+ # Build networkx graph with strong edges only
100
+ G = nx.Graph()
101
+ for node in graph.nodes:
102
+ node_id = node.id if hasattr(node, "id") else str(node)
103
+ G.add_node(node_id)
104
+ for edge in graph.edges:
105
+ if edge.edge_type in STRONG_EDGE_TYPES:
106
+ G.add_edge(edge.source, edge.target, weight=edge.weight)
107
+
108
+ if G.number_of_edges() == 0:
109
+ logger.info("No strong edges found, falling back to domain grouping")
110
+ return self._fallback_domain_grouping(graph, threshold)
111
+
112
+ partition = community_louvain.best_partition(G)
113
+ groups: Dict[int, List[str]] = {}
114
+ for node_id, comm_id in partition.items():
115
+ groups.setdefault(comm_id, []).append(node_id)
116
+
117
+ communities: List[Community] = []
118
+ for comm_id, node_ids in groups.items():
119
+ if len(node_ids) >= threshold:
120
+ communities.append(
121
+ Community(
122
+ community_id=comm_id,
123
+ node_ids=node_ids,
124
+ )
125
+ )
126
+
127
+ return communities
128
+
129
+ def generate_concept_pages(
130
+ self,
131
+ communities: List[Community],
132
+ store: MemoryStore,
133
+ ) -> List[WikiPage]:
134
+ """Generate concept pages for detected communities.
135
+
136
+ Each community gets a concept page listing its member functions.
137
+
138
+ Parameters
139
+ ----------
140
+ communities:
141
+ Detected communities.
142
+ store:
143
+ MemoryStore for looking up Function details.
144
+
145
+ Returns
146
+ -------
147
+ List of WikiPage objects for the communities.
148
+ """
149
+ pages: List[WikiPage] = []
150
+
151
+ for community in communities:
152
+ lines: list[str] = [
153
+ f"# Community {community.community_id}",
154
+ "",
155
+ ]
156
+ if community.theme:
157
+ lines.append(f"**Theme:** {community.theme}")
158
+ lines.append("")
159
+
160
+ lines.append(f"**Members:** {len(community.node_ids)} functions")
161
+ lines.append("")
162
+
163
+ lines.append("## Functions")
164
+ lines.append("")
165
+
166
+ for node_id in community.node_ids:
167
+ func = store.get(node_id)
168
+ if func:
169
+ lines.append(
170
+ f"- [[{func.id}]] -- "
171
+ f"{func.domain or 'uncategorized'} "
172
+ f"(confidence: {func.confidence:.2f})"
173
+ )
174
+ else:
175
+ lines.append(f"- [[{node_id}]]")
176
+
177
+ lines.append("")
178
+
179
+ page_id = f"community_{community.community_id}"
180
+ content = "\n".join(lines)
181
+
182
+ pages.append(WikiPage(
183
+ page_id=page_id,
184
+ content=content,
185
+ metadata={
186
+ "type": "community",
187
+ "community_id": community.community_id,
188
+ "member_count": len(community.node_ids),
189
+ "theme": community.theme,
190
+ },
191
+ ))
192
+
193
+ return pages
194
+
195
+ # ── Private helpers ───────────────────────────────────────────────
196
+
197
+ def _fallback_domain_grouping(
198
+ self,
199
+ graph: GraphData,
200
+ min_size: int,
201
+ ) -> List[Community]:
202
+ """Degraded community detection: group nodes by domain field."""
203
+ groups: Dict[str, List[str]] = {}
204
+ for node in graph.nodes:
205
+ node_id = node.id if hasattr(node, "id") else str(node)
206
+ domain = (
207
+ getattr(node, "domain", None) or "uncategorized"
208
+ )
209
+ groups.setdefault(domain, []).append(node_id)
210
+
211
+ communities: List[Community] = []
212
+ for idx, (domain, node_ids) in enumerate(sorted(groups.items())):
213
+ if len(node_ids) >= min_size:
214
+ communities.append(
215
+ Community(
216
+ community_id=idx,
217
+ node_ids=node_ids,
218
+ theme=domain,
219
+ )
220
+ )
221
+ return communities