memplex 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memnex/__init__.py +31 -0
- memnex/__main__.py +6 -0
- memnex/_plugin/.claude-plugin/plugin.json +24 -0
- memnex/_plugin/.mcp.json +9 -0
- memnex/_plugin/__init__.py +0 -0
- memnex/_plugin/hooks/hooks.json +43 -0
- memnex/_plugin/scripts/hook-runner.py +166 -0
- memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
- memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
- memnex/_plugin/skills/mem-search/SKILL.md +85 -0
- memnex/_plugin/skills/mem-write/SKILL.md +78 -0
- memnex/adapters/__init__.py +14 -0
- memnex/adapters/claude_skill.py +169 -0
- memnex/adapters/cli.py +525 -0
- memnex/adapters/http_api.py +314 -0
- memnex/adapters/mcp_server.py +448 -0
- memnex/compaction.py +563 -0
- memnex/config.py +366 -0
- memnex/core/__init__.py +13 -0
- memnex/core/associator/__init__.py +8 -0
- memnex/core/associator/domain_classifier.py +75 -0
- memnex/core/associator/entity_aligner.py +127 -0
- memnex/core/associator/ref_linker.py +197 -0
- memnex/core/associator/term_mapper.py +77 -0
- memnex/core/dictionaries/__init__.py +50 -0
- memnex/core/engine.py +667 -0
- memnex/core/extractors/__init__.py +15 -0
- memnex/core/extractors/docx.py +97 -0
- memnex/core/extractors/image.py +233 -0
- memnex/core/extractors/markdown.py +139 -0
- memnex/core/extractors/pdf.py +133 -0
- memnex/core/extractors/vision_mapper.py +131 -0
- memnex/core/handlers/__init__.py +7 -0
- memnex/core/handlers/clipboard.py +40 -0
- memnex/core/handlers/file_handler.py +62 -0
- memnex/core/handlers/url_handler.py +132 -0
- memnex/llm/__init__.py +25 -0
- memnex/llm/enhancer.py +226 -0
- memnex/llm/fallback_chain.py +87 -0
- memnex/llm/injection_guard.py +178 -0
- memnex/llm/provider.py +130 -0
- memnex/llm/providers/__init__.py +22 -0
- memnex/llm/providers/anthropic.py +135 -0
- memnex/llm/providers/local.py +135 -0
- memnex/llm/providers/rule_based.py +68 -0
- memnex/llm/sanitizer.py +67 -0
- memnex/models/__init__.py +68 -0
- memnex/models/feedback.py +42 -0
- memnex/models/graph.py +33 -0
- memnex/models/memory.py +102 -0
- memnex/models/misc.py +185 -0
- memnex/models/paragraph.py +45 -0
- memnex/models/search.py +51 -0
- memnex/models/source.py +23 -0
- memnex/models/task.py +62 -0
- memnex/processing/__init__.py +1 -0
- memnex/processing/graph_builder.py +278 -0
- memnex/processing/merger/__init__.py +6 -0
- memnex/processing/merger/confidence_calculator.py +127 -0
- memnex/processing/merger/conflict_resolver.py +116 -0
- memnex/retrieval/__init__.py +1 -0
- memnex/retrieval/dedup.py +386 -0
- memnex/retrieval/embedding.py +289 -0
- memnex/retrieval/reranker.py +299 -0
- memnex/service.py +902 -0
- memnex/storage/__init__.py +65 -0
- memnex/storage/base.py +132 -0
- memnex/storage/changelog.py +106 -0
- memnex/storage/feedback.py +486 -0
- memnex/storage/lite/__init__.py +5 -0
- memnex/storage/lite/store.py +606 -0
- memnex/storage/vector.py +265 -0
- memnex/wiki/__init__.py +11 -0
- memnex/wiki/community.py +221 -0
- memnex/wiki/compiler.py +545 -0
- memnex/wiki/generator.py +270 -0
- memnex/wiki/search.py +282 -0
- memnex/worker.py +412 -0
- memplex-3.2.0.dist-info/METADATA +37 -0
- memplex-3.2.0.dist-info/RECORD +83 -0
- memplex-3.2.0.dist-info/WHEEL +5 -0
- memplex-3.2.0.dist-info/entry_points.txt +2 -0
- memplex-3.2.0.dist-info/top_level.txt +1 -0
memnex/storage/vector.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Vector store abstraction -- migrated from the legacy ``storage/`` package.
|
|
2
|
+
|
|
3
|
+
Provides a ``VectorStore`` Protocol plus two implementations:
|
|
4
|
+
|
|
5
|
+
* ``InMemoryVectorStore`` -- TF-IDF bag-of-words cosine similarity, zero
|
|
6
|
+
external dependencies.
|
|
7
|
+
* ``ChromaVectorStore`` -- ChromaDB + sentence-transformers for production
|
|
8
|
+
quality embeddings.
|
|
9
|
+
|
|
10
|
+
Usage::
|
|
11
|
+
|
|
12
|
+
from memnex.storage.vector import create_vector_store
|
|
13
|
+
|
|
14
|
+
vs = create_vector_store("auto") # ChromaDB if available, else InMemory
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from typing import Dict, List, Optional, Protocol, runtime_checkable
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Re-export the list[float] alias for convenience
|
|
26
|
+
Vector = List[float]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class VectorSearchResult:
|
|
31
|
+
"""A single vector search hit."""
|
|
32
|
+
|
|
33
|
+
id: str
|
|
34
|
+
score: float
|
|
35
|
+
text: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ── Protocol ────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@runtime_checkable
|
|
42
|
+
class VectorStore(Protocol):
|
|
43
|
+
"""Minimal vector store interface."""
|
|
44
|
+
|
|
45
|
+
def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None: ...
|
|
46
|
+
|
|
47
|
+
def upsert(self, id: str, vector: Vector, text: str = "") -> None: ...
|
|
48
|
+
|
|
49
|
+
def upsert_batch(self, items: Dict[str, Vector]) -> None: ...
|
|
50
|
+
|
|
51
|
+
def search(
|
|
52
|
+
self, query: str, top_k: int = 5, query_vector: Optional[Vector] = None
|
|
53
|
+
) -> List[VectorSearchResult]: ...
|
|
54
|
+
|
|
55
|
+
def delete(self, id: str) -> None: ...
|
|
56
|
+
|
|
57
|
+
def clear(self) -> None: ...
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ── InMemory implementation ─────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class InMemoryVectorStore:
|
|
64
|
+
"""In-memory TF-IDF cosine similarity store.
|
|
65
|
+
|
|
66
|
+
Zero external dependencies. Suitable for Lite backend and testing.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self) -> None:
|
|
70
|
+
self._vectors: Dict[str, tuple] = {} # id -> (text, embedding)
|
|
71
|
+
self._stored_vectors: Dict[str, Vector] = {} # id -> pre-stored vector
|
|
72
|
+
self._all_words: set = set()
|
|
73
|
+
|
|
74
|
+
def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None:
|
|
75
|
+
words = self._get_words(text)
|
|
76
|
+
self._all_words.update(words)
|
|
77
|
+
embedding = self._encode_with_vocab(text, words)
|
|
78
|
+
self._vectors[id] = (text, embedding)
|
|
79
|
+
self._stored_vectors.pop(id, None)
|
|
80
|
+
|
|
81
|
+
def upsert(self, id: str, vector: Vector, text: str = "") -> None:
|
|
82
|
+
self._stored_vectors[id] = vector
|
|
83
|
+
self._vectors[id] = (text, [0]) # placeholder text embedding
|
|
84
|
+
|
|
85
|
+
def upsert_batch(self, items: Dict[str, Vector]) -> None:
|
|
86
|
+
for id, vector in items.items():
|
|
87
|
+
self.upsert(id, vector)
|
|
88
|
+
|
|
89
|
+
def search(
|
|
90
|
+
self,
|
|
91
|
+
query: str,
|
|
92
|
+
top_k: int = 5,
|
|
93
|
+
query_vector: Optional[Vector] = None,
|
|
94
|
+
) -> List[VectorSearchResult]:
|
|
95
|
+
if query_vector is not None:
|
|
96
|
+
return self._search_by_vector(query_vector, top_k)
|
|
97
|
+
query_emb = self._encode_with_vocab(query, self._get_words(query))
|
|
98
|
+
return self._search_by_embedding(query_emb, top_k)
|
|
99
|
+
|
|
100
|
+
def _search_by_vector(
|
|
101
|
+
self, query_vec: Vector, top_k: int
|
|
102
|
+
) -> List[VectorSearchResult]:
|
|
103
|
+
scores: list = []
|
|
104
|
+
for vid, vec in self._stored_vectors.items():
|
|
105
|
+
score = self._cosine(query_vec, vec)
|
|
106
|
+
text = self._vectors.get(vid, ("", None))[0]
|
|
107
|
+
scores.append((vid, score, text))
|
|
108
|
+
scores.sort(key=lambda x: x[1], reverse=True)
|
|
109
|
+
return [VectorSearchResult(id=s[0], score=s[1], text=s[2]) for s in scores[:top_k]]
|
|
110
|
+
|
|
111
|
+
def _search_by_embedding(
|
|
112
|
+
self, query_emb: list, top_k: int
|
|
113
|
+
) -> List[VectorSearchResult]:
|
|
114
|
+
scores: list = []
|
|
115
|
+
for vid, (text, emb) in self._vectors.items():
|
|
116
|
+
score = self._cosine(query_emb, emb)
|
|
117
|
+
scores.append((vid, score, text))
|
|
118
|
+
scores.sort(key=lambda x: x[1], reverse=True)
|
|
119
|
+
return [VectorSearchResult(id=s[0], score=s[1], text=s[2]) for s in scores[:top_k]]
|
|
120
|
+
|
|
121
|
+
def delete(self, id: str) -> None:
|
|
122
|
+
self._vectors.pop(id, None)
|
|
123
|
+
self._stored_vectors.pop(id, None)
|
|
124
|
+
|
|
125
|
+
def clear(self) -> None:
|
|
126
|
+
self._vectors.clear()
|
|
127
|
+
self._stored_vectors.clear()
|
|
128
|
+
self._all_words.clear()
|
|
129
|
+
|
|
130
|
+
# ── Helpers ──────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def _get_words(text: str) -> set:
|
|
134
|
+
text_lower = text.lower()
|
|
135
|
+
if any("一" <= c <= "鿿" for c in text):
|
|
136
|
+
return set(list(text_lower))
|
|
137
|
+
return set(text_lower.split())
|
|
138
|
+
|
|
139
|
+
def _encode_with_vocab(self, text: str, words: set) -> list:
|
|
140
|
+
if not self._all_words:
|
|
141
|
+
return [0]
|
|
142
|
+
return [1 if w in words else 0 for w in sorted(self._all_words)]
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _cosine(a: list, b: list) -> float:
|
|
146
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
147
|
+
norm_a = sum(x * x for x in a) ** 0.5
|
|
148
|
+
norm_b = sum(x * x for x in b) ** 0.5
|
|
149
|
+
return dot / (norm_a * norm_b + 1e-10)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ── ChromaDB implementation ─────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
import chromadb # type: ignore
|
|
156
|
+
from chromadb.config import Settings # type: ignore
|
|
157
|
+
|
|
158
|
+
_CHROMA_AVAILABLE = True
|
|
159
|
+
except ImportError:
|
|
160
|
+
_CHROMA_AVAILABLE = False
|
|
161
|
+
chromadb = None # type: ignore[assignment]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ChromaVectorStore:
|
|
165
|
+
"""ChromaDB-backed vector store with sentence-transformers embeddings."""
|
|
166
|
+
|
|
167
|
+
def __init__(
|
|
168
|
+
self,
|
|
169
|
+
collection_name: str = "functions",
|
|
170
|
+
embedding_model: str = "all-MiniLM-L6-v2",
|
|
171
|
+
) -> None:
|
|
172
|
+
if not _CHROMA_AVAILABLE:
|
|
173
|
+
raise ImportError(
|
|
174
|
+
"chromadb not installed: pip install chromadb sentence-transformers"
|
|
175
|
+
)
|
|
176
|
+
self.client = chromadb.Client(Settings(anonymized_telemetry=False))
|
|
177
|
+
self.collection = self.client.get_or_create_collection(
|
|
178
|
+
name=collection_name
|
|
179
|
+
)
|
|
180
|
+
self._embedding_model = embedding_model
|
|
181
|
+
self._model = None
|
|
182
|
+
|
|
183
|
+
def _get_model(self):
|
|
184
|
+
if self._model is None:
|
|
185
|
+
from sentence_transformers import SentenceTransformer # type: ignore
|
|
186
|
+
|
|
187
|
+
self._model = SentenceTransformer(self._embedding_model)
|
|
188
|
+
return self._model
|
|
189
|
+
|
|
190
|
+
def add(self, id: str, text: str, metadata: Optional[dict] = None) -> None:
|
|
191
|
+
embedding = self._get_model().encode([text])[0]
|
|
192
|
+
self.collection.upsert(
|
|
193
|
+
ids=[id],
|
|
194
|
+
embeddings=[embedding.tolist()],
|
|
195
|
+
documents=[text],
|
|
196
|
+
metadatas=[metadata or {}],
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def upsert(self, id: str, vector: Vector, text: str = "") -> None:
|
|
200
|
+
self.collection.upsert(
|
|
201
|
+
ids=[id],
|
|
202
|
+
embeddings=[vector if isinstance(vector, list) else list(vector)],
|
|
203
|
+
documents=[text],
|
|
204
|
+
metadatas=[{}],
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def upsert_batch(self, items: Dict[str, Vector]) -> None:
|
|
208
|
+
ids = list(items.keys())
|
|
209
|
+
vectors = [v if isinstance(v, list) else list(v) for v in items.values()]
|
|
210
|
+
self.collection.upsert(
|
|
211
|
+
ids=ids,
|
|
212
|
+
embeddings=vectors,
|
|
213
|
+
documents=[""] * len(ids),
|
|
214
|
+
metadatas=[{}] * len(ids),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def search(
|
|
218
|
+
self,
|
|
219
|
+
query: str,
|
|
220
|
+
top_k: int = 5,
|
|
221
|
+
query_vector: Optional[Vector] = None,
|
|
222
|
+
) -> List[VectorSearchResult]:
|
|
223
|
+
if query_vector is not None:
|
|
224
|
+
q_emb = query_vector if isinstance(query_vector, list) else list(query_vector)
|
|
225
|
+
else:
|
|
226
|
+
q_emb = self._get_model().encode([query])[0].tolist()
|
|
227
|
+
results = self.collection.query(
|
|
228
|
+
query_embeddings=[q_emb], n_results=top_k
|
|
229
|
+
)
|
|
230
|
+
ids = results.get("ids", [[]])[0]
|
|
231
|
+
distances = results.get("distances", [[]])[0]
|
|
232
|
+
documents = results.get("documents", [[]])[0]
|
|
233
|
+
return [
|
|
234
|
+
VectorSearchResult(id=ids[i], score=float(distances[i]), text=documents[i])
|
|
235
|
+
for i in range(len(ids))
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
def delete(self, id: str) -> None:
|
|
239
|
+
self.collection.delete(ids=[id])
|
|
240
|
+
|
|
241
|
+
def clear(self) -> None:
|
|
242
|
+
self.collection.delete(where={})
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# ── Factory ──────────────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def create_vector_store(backend: str = "auto") -> VectorStore:
|
|
249
|
+
"""Create a vector store by backend name.
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
backend:
|
|
254
|
+
``"inmemory"`` | ``"chroma"`` | ``"auto"`` (default).
|
|
255
|
+
``"auto"`` prefers ChromaDB and falls back to InMemory.
|
|
256
|
+
"""
|
|
257
|
+
if backend == "chroma" and _CHROMA_AVAILABLE:
|
|
258
|
+
return ChromaVectorStore()
|
|
259
|
+
if backend == "inmemory":
|
|
260
|
+
return InMemoryVectorStore()
|
|
261
|
+
if backend == "auto":
|
|
262
|
+
if _CHROMA_AVAILABLE:
|
|
263
|
+
return ChromaVectorStore()
|
|
264
|
+
return InMemoryVectorStore()
|
|
265
|
+
raise ValueError(f"Unknown vector store backend: {backend!r}")
|
memnex/wiki/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""MemNex Wiki Layer -- compile, generate, search, and lint wiki pages."""
|
|
2
|
+
|
|
3
|
+
from memnex.wiki.compiler import WikiCompiler
|
|
4
|
+
from memnex.wiki.generator import LLMWikiGenerator
|
|
5
|
+
from memnex.wiki.search import DualIndexSearch
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"WikiCompiler",
|
|
9
|
+
"LLMWikiGenerator",
|
|
10
|
+
"DualIndexSearch",
|
|
11
|
+
]
|
memnex/wiki/community.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""GraphRAG community detection using the Louvain algorithm.
|
|
2
|
+
|
|
3
|
+
Detects communities (clusters) in the knowledge graph and generates
|
|
4
|
+
concept pages for each community. Falls back to domain-based grouping
|
|
5
|
+
when python-louvain / networkx are not available.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Dict, List, Optional, TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
from memnex.models import (
|
|
15
|
+
Function,
|
|
16
|
+
GraphData,
|
|
17
|
+
WikiPage,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from memnex.storage.base import MemoryStore
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Only use strong-relationship edge types for community detection
|
|
26
|
+
STRONG_EDGE_TYPES = frozenset({
|
|
27
|
+
"REFERENCES",
|
|
28
|
+
"DEPENDS_ON",
|
|
29
|
+
"IMPLEMENTS",
|
|
30
|
+
"ASSOCIATED_WITH",
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
DEFAULT_MIN_COMMUNITY_SIZE = 3
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Community:
|
|
38
|
+
"""A detected community of function nodes."""
|
|
39
|
+
|
|
40
|
+
community_id: int
|
|
41
|
+
node_ids: List[str] = field(default_factory=list)
|
|
42
|
+
theme: str = ""
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def size(self) -> int:
|
|
46
|
+
return len(self.node_ids)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class GraphCommunityDetector:
|
|
50
|
+
"""Louvain-based graph community detector.
|
|
51
|
+
|
|
52
|
+
Detects communities (clusters) in the knowledge graph using the
|
|
53
|
+
Louvain algorithm. Falls back to domain-based grouping when
|
|
54
|
+
``python-louvain`` or ``networkx`` are not installed.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
min_community_size:
|
|
59
|
+
Minimum number of nodes for a community to be reported.
|
|
60
|
+
Communities below this threshold are discarded as noise.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
min_community_size: int = DEFAULT_MIN_COMMUNITY_SIZE,
|
|
66
|
+
) -> None:
|
|
67
|
+
self.min_community_size = min_community_size
|
|
68
|
+
|
|
69
|
+
def detect_communities(
|
|
70
|
+
self,
|
|
71
|
+
graph: GraphData,
|
|
72
|
+
min_size: Optional[int] = None,
|
|
73
|
+
) -> List[Community]:
|
|
74
|
+
"""Detect communities in the graph.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
graph:
|
|
79
|
+
The graph data containing nodes and edges.
|
|
80
|
+
min_size:
|
|
81
|
+
Override minimum community size for this call.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
List of Community objects, each with a list of node IDs.
|
|
86
|
+
"""
|
|
87
|
+
threshold = min_size if min_size is not None else self.min_community_size
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
import networkx as nx # type: ignore
|
|
91
|
+
import community as community_louvain # type: ignore
|
|
92
|
+
except ImportError:
|
|
93
|
+
logger.info(
|
|
94
|
+
"python-louvain/networkx not available, "
|
|
95
|
+
"falling back to domain grouping"
|
|
96
|
+
)
|
|
97
|
+
return self._fallback_domain_grouping(graph, threshold)
|
|
98
|
+
|
|
99
|
+
# Build networkx graph with strong edges only
|
|
100
|
+
G = nx.Graph()
|
|
101
|
+
for node in graph.nodes:
|
|
102
|
+
node_id = node.id if hasattr(node, "id") else str(node)
|
|
103
|
+
G.add_node(node_id)
|
|
104
|
+
for edge in graph.edges:
|
|
105
|
+
if edge.edge_type in STRONG_EDGE_TYPES:
|
|
106
|
+
G.add_edge(edge.source, edge.target, weight=edge.weight)
|
|
107
|
+
|
|
108
|
+
if G.number_of_edges() == 0:
|
|
109
|
+
logger.info("No strong edges found, falling back to domain grouping")
|
|
110
|
+
return self._fallback_domain_grouping(graph, threshold)
|
|
111
|
+
|
|
112
|
+
partition = community_louvain.best_partition(G)
|
|
113
|
+
groups: Dict[int, List[str]] = {}
|
|
114
|
+
for node_id, comm_id in partition.items():
|
|
115
|
+
groups.setdefault(comm_id, []).append(node_id)
|
|
116
|
+
|
|
117
|
+
communities: List[Community] = []
|
|
118
|
+
for comm_id, node_ids in groups.items():
|
|
119
|
+
if len(node_ids) >= threshold:
|
|
120
|
+
communities.append(
|
|
121
|
+
Community(
|
|
122
|
+
community_id=comm_id,
|
|
123
|
+
node_ids=node_ids,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return communities
|
|
128
|
+
|
|
129
|
+
def generate_concept_pages(
|
|
130
|
+
self,
|
|
131
|
+
communities: List[Community],
|
|
132
|
+
store: MemoryStore,
|
|
133
|
+
) -> List[WikiPage]:
|
|
134
|
+
"""Generate concept pages for detected communities.
|
|
135
|
+
|
|
136
|
+
Each community gets a concept page listing its member functions.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
communities:
|
|
141
|
+
Detected communities.
|
|
142
|
+
store:
|
|
143
|
+
MemoryStore for looking up Function details.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
List of WikiPage objects for the communities.
|
|
148
|
+
"""
|
|
149
|
+
pages: List[WikiPage] = []
|
|
150
|
+
|
|
151
|
+
for community in communities:
|
|
152
|
+
lines: list[str] = [
|
|
153
|
+
f"# Community {community.community_id}",
|
|
154
|
+
"",
|
|
155
|
+
]
|
|
156
|
+
if community.theme:
|
|
157
|
+
lines.append(f"**Theme:** {community.theme}")
|
|
158
|
+
lines.append("")
|
|
159
|
+
|
|
160
|
+
lines.append(f"**Members:** {len(community.node_ids)} functions")
|
|
161
|
+
lines.append("")
|
|
162
|
+
|
|
163
|
+
lines.append("## Functions")
|
|
164
|
+
lines.append("")
|
|
165
|
+
|
|
166
|
+
for node_id in community.node_ids:
|
|
167
|
+
func = store.get(node_id)
|
|
168
|
+
if func:
|
|
169
|
+
lines.append(
|
|
170
|
+
f"- [[{func.id}]] -- "
|
|
171
|
+
f"{func.domain or 'uncategorized'} "
|
|
172
|
+
f"(confidence: {func.confidence:.2f})"
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
lines.append(f"- [[{node_id}]]")
|
|
176
|
+
|
|
177
|
+
lines.append("")
|
|
178
|
+
|
|
179
|
+
page_id = f"community_{community.community_id}"
|
|
180
|
+
content = "\n".join(lines)
|
|
181
|
+
|
|
182
|
+
pages.append(WikiPage(
|
|
183
|
+
page_id=page_id,
|
|
184
|
+
content=content,
|
|
185
|
+
metadata={
|
|
186
|
+
"type": "community",
|
|
187
|
+
"community_id": community.community_id,
|
|
188
|
+
"member_count": len(community.node_ids),
|
|
189
|
+
"theme": community.theme,
|
|
190
|
+
},
|
|
191
|
+
))
|
|
192
|
+
|
|
193
|
+
return pages
|
|
194
|
+
|
|
195
|
+
# ── Private helpers ───────────────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
def _fallback_domain_grouping(
|
|
198
|
+
self,
|
|
199
|
+
graph: GraphData,
|
|
200
|
+
min_size: int,
|
|
201
|
+
) -> List[Community]:
|
|
202
|
+
"""Degraded community detection: group nodes by domain field."""
|
|
203
|
+
groups: Dict[str, List[str]] = {}
|
|
204
|
+
for node in graph.nodes:
|
|
205
|
+
node_id = node.id if hasattr(node, "id") else str(node)
|
|
206
|
+
domain = (
|
|
207
|
+
getattr(node, "domain", None) or "uncategorized"
|
|
208
|
+
)
|
|
209
|
+
groups.setdefault(domain, []).append(node_id)
|
|
210
|
+
|
|
211
|
+
communities: List[Community] = []
|
|
212
|
+
for idx, (domain, node_ids) in enumerate(sorted(groups.items())):
|
|
213
|
+
if len(node_ids) >= min_size:
|
|
214
|
+
communities.append(
|
|
215
|
+
Community(
|
|
216
|
+
community_id=idx,
|
|
217
|
+
node_ids=node_ids,
|
|
218
|
+
theme=domain,
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
return communities
|