crprotocol 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crp/__init__.py +126 -0
- crp/__main__.py +8 -0
- crp/_typing.py +27 -0
- crp/_version.py +5 -0
- crp/adapters.py +31 -0
- crp/advanced/__init__.py +40 -0
- crp/advanced/auto_ingest.py +400 -0
- crp/advanced/cqs.py +235 -0
- crp/advanced/cross_window.py +477 -0
- crp/advanced/curator.py +265 -0
- crp/advanced/feedback.py +146 -0
- crp/advanced/hierarchical.py +211 -0
- crp/advanced/meta_learning.py +401 -0
- crp/advanced/parallel.py +98 -0
- crp/advanced/review_cycle.py +329 -0
- crp/advanced/scale_mode.py +129 -0
- crp/advanced/source_grounding.py +207 -0
- crp/ckf/__init__.py +35 -0
- crp/ckf/community.py +377 -0
- crp/ckf/fabric.py +445 -0
- crp/ckf/gc.py +175 -0
- crp/ckf/graph_walk.py +87 -0
- crp/ckf/merge.py +133 -0
- crp/ckf/pattern_query.py +122 -0
- crp/ckf/pubsub.py +128 -0
- crp/ckf/semantic.py +207 -0
- crp/cli/__init__.py +7 -0
- crp/cli/main.py +329 -0
- crp/cli/sidecar.py +929 -0
- crp/cli/startup.py +272 -0
- crp/continuation/__init__.py +103 -0
- crp/continuation/completion.py +348 -0
- crp/continuation/degradation.py +157 -0
- crp/continuation/document_map.py +160 -0
- crp/continuation/flow.py +109 -0
- crp/continuation/gap.py +419 -0
- crp/continuation/manager.py +484 -0
- crp/continuation/quality_monitor.py +179 -0
- crp/continuation/stitch.py +419 -0
- crp/continuation/trigger.py +142 -0
- crp/continuation/voice.py +157 -0
- crp/core/__init__.py +69 -0
- crp/core/batch.py +77 -0
- crp/core/circuit_breaker.py +116 -0
- crp/core/config.py +377 -0
- crp/core/context_tools.py +540 -0
- crp/core/dispatch_router.py +3977 -0
- crp/core/errors.py +128 -0
- crp/core/extraction_facade.py +384 -0
- crp/core/facilitator.py +713 -0
- crp/core/idempotency.py +215 -0
- crp/core/orchestrator.py +1435 -0
- crp/core/relay_strategies.py +613 -0
- crp/core/security_manager.py +140 -0
- crp/core/session.py +134 -0
- crp/core/task_intent.py +36 -0
- crp/core/window.py +363 -0
- crp/envelope/__init__.py +30 -0
- crp/envelope/builder.py +288 -0
- crp/envelope/decomposer.py +236 -0
- crp/envelope/formatter.py +168 -0
- crp/envelope/packer.py +211 -0
- crp/envelope/reranker.py +209 -0
- crp/envelope/scoring.py +310 -0
- crp/extraction/__init__.py +45 -0
- crp/extraction/complexity.py +96 -0
- crp/extraction/contradiction.py +132 -0
- crp/extraction/pipeline.py +360 -0
- crp/extraction/quality_gate.py +237 -0
- crp/extraction/stage1_regex.py +173 -0
- crp/extraction/stage2_statistical.py +244 -0
- crp/extraction/stage3_gliner.py +210 -0
- crp/extraction/stage4_uie.py +183 -0
- crp/extraction/stage5_discourse.py +175 -0
- crp/extraction/stage6_llm.py +178 -0
- crp/extraction/structured_output.py +219 -0
- crp/extraction/types.py +299 -0
- crp/license_guard.py +722 -0
- crp/observability/__init__.py +30 -0
- crp/observability/audit.py +118 -0
- crp/observability/events.py +233 -0
- crp/observability/metrics.py +264 -0
- crp/observability/quality.py +135 -0
- crp/observability/structured_logging.py +81 -0
- crp/observability/telemetry.py +117 -0
- crp/provenance/__init__.py +314 -0
- crp/provenance/_embeddings.py +97 -0
- crp/provenance/_types.py +378 -0
- crp/provenance/attribution_scorer.py +252 -0
- crp/provenance/claim_detector.py +229 -0
- crp/provenance/contradiction_detector.py +243 -0
- crp/provenance/distortion_detector.py +397 -0
- crp/provenance/entailment_verifier.py +358 -0
- crp/provenance/fabrication_detector.py +203 -0
- crp/provenance/hallucination_scorer.py +320 -0
- crp/provenance/omission_analyzer.py +106 -0
- crp/provenance/provenance_chain.py +205 -0
- crp/provenance/report_generator.py +440 -0
- crp/providers/__init__.py +43 -0
- crp/providers/anthropic.py +270 -0
- crp/providers/base.py +135 -0
- crp/providers/custom.py +63 -0
- crp/providers/diagnostic.py +251 -0
- crp/providers/llamacpp.py +224 -0
- crp/providers/manager.py +139 -0
- crp/providers/ollama.py +243 -0
- crp/providers/openai.py +628 -0
- crp/providers/tokenizers.py +48 -0
- crp/py.typed +0 -0
- crp/resources/__init__.py +53 -0
- crp/resources/adaptive_allocator.py +525 -0
- crp/resources/cost_model.py +388 -0
- crp/resources/overhead_manager.py +217 -0
- crp/resources/resource_manager.py +262 -0
- crp/schemas/__init__.py +20 -0
- crp/schemas/cost-estimate.json +33 -0
- crp/schemas/crp-error.json +43 -0
- crp/schemas/envelope-preview.json +40 -0
- crp/schemas/persisted-state-header.json +27 -0
- crp/schemas/quality-report.json +94 -0
- crp/schemas/session-handle.json +33 -0
- crp/schemas/session-status.json +57 -0
- crp/schemas/stream-event.json +18 -0
- crp/schemas/task-intent.json +42 -0
- crp/security/__init__.py +93 -0
- crp/security/audit_trail.py +392 -0
- crp/security/binding.py +192 -0
- crp/security/compliance.py +813 -0
- crp/security/consent.py +593 -0
- crp/security/embedding_defense.py +161 -0
- crp/security/encryption.py +202 -0
- crp/security/injection.py +335 -0
- crp/security/integrity.py +267 -0
- crp/security/privacy.py +662 -0
- crp/security/quarantine.py +249 -0
- crp/security/rbac.py +221 -0
- crp/security/validation.py +164 -0
- crp/state/__init__.py +31 -0
- crp/state/cold_storage.py +258 -0
- crp/state/compaction.py +263 -0
- crp/state/critical_state.py +104 -0
- crp/state/event_log.py +313 -0
- crp/state/fact.py +189 -0
- crp/state/serialization.py +189 -0
- crp/state/session_cleanup.py +77 -0
- crp/state/snapshot.py +290 -0
- crp/state/warm_store.py +346 -0
- crprotocol-2.0.0.dist-info/METADATA +1295 -0
- crprotocol-2.0.0.dist-info/RECORD +153 -0
- crprotocol-2.0.0.dist-info/WHEEL +4 -0
- crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
- crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
- crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
crp/ckf/community.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
|
|
2
|
+
# Licensed under Elastic License 2.0 — see LICENSE.md for details.
|
|
3
|
+
"""CKF Mode 4: Community detection — Leiden cluster summaries (§3.8).
|
|
4
|
+
|
|
5
|
+
Batch community detection per window. Incremental update for <10% change,
|
|
6
|
+
full rebuild for ≥30%. Falls back to connected components when leidenalg
|
|
7
|
+
is unavailable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from crp.extraction.types import FactGraph
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Optional dependency check
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_IGRAPH: Any = None
|
|
25
|
+
_LEIDENALG: Any = None
|
|
26
|
+
_CHECKED = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _check_leiden() -> bool:
|
|
30
|
+
"""Return True if igraph + leidenalg are available."""
|
|
31
|
+
global _IGRAPH, _LEIDENALG, _CHECKED # noqa: PLW0603
|
|
32
|
+
if not _CHECKED:
|
|
33
|
+
try:
|
|
34
|
+
import igraph # type: ignore[import-untyped]
|
|
35
|
+
import leidenalg # type: ignore[import-untyped]
|
|
36
|
+
|
|
37
|
+
_IGRAPH = igraph
|
|
38
|
+
_LEIDENALG = leidenalg
|
|
39
|
+
except ImportError:
|
|
40
|
+
_IGRAPH = None
|
|
41
|
+
_LEIDENALG = None
|
|
42
|
+
_CHECKED = True
|
|
43
|
+
return _IGRAPH is not None and _LEIDENALG is not None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Data types
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Community:
|
|
53
|
+
"""A cluster of semantically related facts."""
|
|
54
|
+
|
|
55
|
+
community_id: int
|
|
56
|
+
fact_ids: list[str] = field(default_factory=list)
|
|
57
|
+
summary: str = ""
|
|
58
|
+
centroid_id: str = ""
|
|
59
|
+
coherence: float = 0.0
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def size(self) -> int:
|
|
63
|
+
return len(self.fact_ids)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class CommunityResult:
|
|
68
|
+
"""Result from community detection."""
|
|
69
|
+
|
|
70
|
+
communities: list[Community] = field(default_factory=list)
|
|
71
|
+
fact_to_community: dict[str, int] = field(default_factory=dict)
|
|
72
|
+
used_leiden: bool = False
|
|
73
|
+
modularity: float = 0.0
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Leiden detection (full)
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _leiden_detect(graph: FactGraph) -> CommunityResult:
|
|
82
|
+
"""Full Leiden community detection using igraph + leidenalg."""
|
|
83
|
+
if not graph.nodes:
|
|
84
|
+
return CommunityResult()
|
|
85
|
+
|
|
86
|
+
ig = _IGRAPH
|
|
87
|
+
la = _LEIDENALG
|
|
88
|
+
|
|
89
|
+
# Build igraph graph
|
|
90
|
+
node_ids = list(graph.nodes.keys())
|
|
91
|
+
id_to_idx = {nid: i for i, nid in enumerate(node_ids)}
|
|
92
|
+
|
|
93
|
+
g = ig.Graph(n=len(node_ids), directed=False)
|
|
94
|
+
edge_list = []
|
|
95
|
+
for edge in graph.edges:
|
|
96
|
+
src = id_to_idx.get(edge.source_id)
|
|
97
|
+
tgt = id_to_idx.get(edge.target_id)
|
|
98
|
+
if src is not None and tgt is not None and src != tgt:
|
|
99
|
+
edge_list.append((src, tgt))
|
|
100
|
+
if edge_list:
|
|
101
|
+
g.add_edges(edge_list)
|
|
102
|
+
|
|
103
|
+
# Run Leiden
|
|
104
|
+
partition = la.find_partition(g, la.ModularityVertexPartition)
|
|
105
|
+
|
|
106
|
+
# Build communities
|
|
107
|
+
communities: dict[int, list[str]] = {}
|
|
108
|
+
fact_to_community: dict[str, int] = {}
|
|
109
|
+
|
|
110
|
+
for idx, comm_id in enumerate(partition.membership):
|
|
111
|
+
fid = node_ids[idx]
|
|
112
|
+
communities.setdefault(comm_id, []).append(fid)
|
|
113
|
+
fact_to_community[fid] = comm_id
|
|
114
|
+
|
|
115
|
+
result_communities = []
|
|
116
|
+
for cid, fids in sorted(communities.items()):
|
|
117
|
+
centroid = _pick_centroid(graph, fids)
|
|
118
|
+
summary = _summarize_community(graph, fids)
|
|
119
|
+
result_communities.append(
|
|
120
|
+
Community(
|
|
121
|
+
community_id=cid,
|
|
122
|
+
fact_ids=fids,
|
|
123
|
+
summary=summary,
|
|
124
|
+
centroid_id=centroid,
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return CommunityResult(
|
|
129
|
+
communities=result_communities,
|
|
130
|
+
fact_to_community=fact_to_community,
|
|
131
|
+
used_leiden=True,
|
|
132
|
+
modularity=partition.modularity,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
# Fallback: connected components
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _connected_components_detect(graph: FactGraph) -> CommunityResult:
|
|
142
|
+
"""Fallback community detection using BFS connected components."""
|
|
143
|
+
if not graph.nodes:
|
|
144
|
+
return CommunityResult()
|
|
145
|
+
|
|
146
|
+
# Build adjacency
|
|
147
|
+
adj: dict[str, set[str]] = {nid: set() for nid in graph.nodes}
|
|
148
|
+
for edge in graph.edges:
|
|
149
|
+
if edge.source_id in adj and edge.target_id in adj:
|
|
150
|
+
adj[edge.source_id].add(edge.target_id)
|
|
151
|
+
adj[edge.target_id].add(edge.source_id)
|
|
152
|
+
|
|
153
|
+
visited: set[str] = set()
|
|
154
|
+
communities: list[Community] = []
|
|
155
|
+
fact_to_community: dict[str, int] = {}
|
|
156
|
+
comm_id = 0
|
|
157
|
+
|
|
158
|
+
for start in graph.nodes:
|
|
159
|
+
if start in visited:
|
|
160
|
+
continue
|
|
161
|
+
# BFS
|
|
162
|
+
component: list[str] = []
|
|
163
|
+
queue = [start]
|
|
164
|
+
while queue:
|
|
165
|
+
node = queue.pop()
|
|
166
|
+
if node in visited:
|
|
167
|
+
continue
|
|
168
|
+
visited.add(node)
|
|
169
|
+
component.append(node)
|
|
170
|
+
for neighbour in adj.get(node, set()):
|
|
171
|
+
if neighbour not in visited:
|
|
172
|
+
queue.append(neighbour)
|
|
173
|
+
|
|
174
|
+
centroid = _pick_centroid(graph, component)
|
|
175
|
+
summary = _summarize_community(graph, component)
|
|
176
|
+
for fid in component:
|
|
177
|
+
fact_to_community[fid] = comm_id
|
|
178
|
+
communities.append(
|
|
179
|
+
Community(
|
|
180
|
+
community_id=comm_id,
|
|
181
|
+
fact_ids=component,
|
|
182
|
+
summary=summary,
|
|
183
|
+
centroid_id=centroid,
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
comm_id += 1
|
|
187
|
+
|
|
188
|
+
return CommunityResult(
|
|
189
|
+
communities=communities,
|
|
190
|
+
fact_to_community=fact_to_community,
|
|
191
|
+
used_leiden=False,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# Helpers
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _pick_centroid(graph: FactGraph, fact_ids: list[str]) -> str:
|
|
201
|
+
"""Pick the fact with highest confidence as community centroid."""
|
|
202
|
+
best_id = ""
|
|
203
|
+
best_conf = -1.0
|
|
204
|
+
for fid in fact_ids:
|
|
205
|
+
fact = graph.nodes.get(fid)
|
|
206
|
+
if fact and (fact.confidence or 0.0) > best_conf:
|
|
207
|
+
best_conf = fact.confidence or 0.0
|
|
208
|
+
best_id = fid
|
|
209
|
+
return best_id
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _summarize_community(graph: FactGraph, fact_ids: list[str]) -> str:
|
|
213
|
+
"""Build a short summary from the top-3 facts by confidence."""
|
|
214
|
+
facts = []
|
|
215
|
+
for fid in fact_ids:
|
|
216
|
+
fact = graph.nodes.get(fid)
|
|
217
|
+
if fact:
|
|
218
|
+
facts.append(fact)
|
|
219
|
+
facts.sort(key=lambda f: -(f.confidence or 0.0))
|
|
220
|
+
top = facts[:3]
|
|
221
|
+
if not top:
|
|
222
|
+
return ""
|
|
223
|
+
return "; ".join(f.text for f in top if f.text)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
# Incremental update manager
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
# Thresholds for incremental vs full rebuild
|
|
231
|
+
INCREMENTAL_THRESHOLD = 0.10 # <10% change → incremental
|
|
232
|
+
FULL_REBUILD_THRESHOLD = 0.30 # ≥30% change → full rebuild
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class CommunityDetector:
|
|
236
|
+
"""Manages community detection with incremental updates.
|
|
237
|
+
|
|
238
|
+
Tracks the previous community state and decides whether to run
|
|
239
|
+
a full rebuild or incremental update based on the change ratio.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
def __init__(self) -> None:
|
|
243
|
+
self._last_result: CommunityResult | None = None
|
|
244
|
+
self._last_node_count: int = 0
|
|
245
|
+
self._last_edge_count: int = 0
|
|
246
|
+
|
|
247
|
+
def detect(self, graph: FactGraph) -> CommunityResult:
|
|
248
|
+
"""Run community detection, choosing strategy based on change ratio."""
|
|
249
|
+
node_count = len(graph.nodes)
|
|
250
|
+
edge_count = len(graph.edges)
|
|
251
|
+
|
|
252
|
+
if self._last_result is None:
|
|
253
|
+
# First run — always full
|
|
254
|
+
result = self._full_detect(graph)
|
|
255
|
+
else:
|
|
256
|
+
change_ratio = self._compute_change_ratio(node_count, edge_count)
|
|
257
|
+
if change_ratio >= FULL_REBUILD_THRESHOLD:
|
|
258
|
+
result = self._full_detect(graph)
|
|
259
|
+
elif change_ratio < INCREMENTAL_THRESHOLD:
|
|
260
|
+
# Very small change — reuse previous result with minor updates
|
|
261
|
+
result = self._incremental_update(graph)
|
|
262
|
+
else:
|
|
263
|
+
# Between thresholds — full rebuild
|
|
264
|
+
result = self._full_detect(graph)
|
|
265
|
+
|
|
266
|
+
self._last_result = result
|
|
267
|
+
self._last_node_count = node_count
|
|
268
|
+
self._last_edge_count = edge_count
|
|
269
|
+
return result
|
|
270
|
+
|
|
271
|
+
def _compute_change_ratio(self, node_count: int, edge_count: int) -> float:
|
|
272
|
+
"""Compute approximate change ratio since last detection."""
|
|
273
|
+
if self._last_node_count == 0:
|
|
274
|
+
return 1.0
|
|
275
|
+
node_delta = abs(node_count - self._last_node_count)
|
|
276
|
+
edge_delta = abs(edge_count - self._last_edge_count)
|
|
277
|
+
return max(
|
|
278
|
+
node_delta / max(self._last_node_count, 1),
|
|
279
|
+
edge_delta / max(self._last_edge_count, 1),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def _full_detect(self, graph: FactGraph) -> CommunityResult:
|
|
283
|
+
"""Full detection — Leiden if available, else connected components."""
|
|
284
|
+
if _check_leiden():
|
|
285
|
+
try:
|
|
286
|
+
return _leiden_detect(graph)
|
|
287
|
+
except Exception: # noqa: BLE001
|
|
288
|
+
logger.warning("Leiden detection failed, falling back to connected components")
|
|
289
|
+
return _connected_components_detect(graph)
|
|
290
|
+
|
|
291
|
+
def _incremental_update(self, graph: FactGraph) -> CommunityResult:
|
|
292
|
+
"""Reuse previous communities, only assigning new nodes to nearest community."""
|
|
293
|
+
if self._last_result is None:
|
|
294
|
+
return self._full_detect(graph)
|
|
295
|
+
|
|
296
|
+
prev = self._last_result
|
|
297
|
+
new_nodes = set(graph.nodes.keys()) - set(prev.fact_to_community.keys())
|
|
298
|
+
removed_nodes = set(prev.fact_to_community.keys()) - set(graph.nodes.keys())
|
|
299
|
+
|
|
300
|
+
if not new_nodes and not removed_nodes:
|
|
301
|
+
return prev
|
|
302
|
+
|
|
303
|
+
# Copy existing assignments (removing deleted nodes)
|
|
304
|
+
fact_to_comm = {
|
|
305
|
+
fid: cid
|
|
306
|
+
for fid, cid in prev.fact_to_community.items()
|
|
307
|
+
if fid not in removed_nodes
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
# Assign new nodes to community of their best-connected neighbour
|
|
311
|
+
for fid in new_nodes:
|
|
312
|
+
# Find connected existing nodes
|
|
313
|
+
neighbour_comms: dict[int, int] = {}
|
|
314
|
+
for edge in graph.edges:
|
|
315
|
+
peer = None
|
|
316
|
+
if edge.source_id == fid and edge.target_id in fact_to_comm:
|
|
317
|
+
peer = edge.target_id
|
|
318
|
+
elif edge.target_id == fid and edge.source_id in fact_to_comm:
|
|
319
|
+
peer = edge.source_id
|
|
320
|
+
if peer:
|
|
321
|
+
cid = fact_to_comm[peer]
|
|
322
|
+
neighbour_comms[cid] = neighbour_comms.get(cid, 0) + 1
|
|
323
|
+
|
|
324
|
+
if neighbour_comms:
|
|
325
|
+
# Assign to most frequent community
|
|
326
|
+
best_cid = max(neighbour_comms, key=lambda c: neighbour_comms[c])
|
|
327
|
+
fact_to_comm[fid] = best_cid
|
|
328
|
+
else:
|
|
329
|
+
# Isolated new node — create a new community
|
|
330
|
+
max_cid = max(fact_to_comm.values()) + 1 if fact_to_comm else 0
|
|
331
|
+
fact_to_comm[fid] = max_cid
|
|
332
|
+
|
|
333
|
+
# Rebuild community objects
|
|
334
|
+
communities_dict: dict[int, list[str]] = {}
|
|
335
|
+
for fid, cid in fact_to_comm.items():
|
|
336
|
+
communities_dict.setdefault(cid, []).append(fid)
|
|
337
|
+
|
|
338
|
+
communities = []
|
|
339
|
+
for cid, fids in sorted(communities_dict.items()):
|
|
340
|
+
centroid = _pick_centroid(graph, fids)
|
|
341
|
+
summary = _summarize_community(graph, fids)
|
|
342
|
+
communities.append(
|
|
343
|
+
Community(
|
|
344
|
+
community_id=cid,
|
|
345
|
+
fact_ids=fids,
|
|
346
|
+
summary=summary,
|
|
347
|
+
centroid_id=centroid,
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
return CommunityResult(
|
|
352
|
+
communities=communities,
|
|
353
|
+
fact_to_community=fact_to_comm,
|
|
354
|
+
used_leiden=prev.used_leiden,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def community_summary(self, graph: FactGraph, topic: str) -> list[Community]:
|
|
358
|
+
"""Return communities matching *topic* (substring or keyword match)."""
|
|
359
|
+
if self._last_result is None:
|
|
360
|
+
self.detect(graph)
|
|
361
|
+
if self._last_result is None:
|
|
362
|
+
return []
|
|
363
|
+
|
|
364
|
+
topic_lower = topic.lower()
|
|
365
|
+
matched = []
|
|
366
|
+
for comm in self._last_result.communities:
|
|
367
|
+
# Check if topic appears in any fact text or summary
|
|
368
|
+
if topic_lower in comm.summary.lower():
|
|
369
|
+
matched.append(comm)
|
|
370
|
+
continue
|
|
371
|
+
for fid in comm.fact_ids:
|
|
372
|
+
fact = graph.nodes.get(fid)
|
|
373
|
+
if fact and topic_lower in fact.text.lower():
|
|
374
|
+
matched.append(comm)
|
|
375
|
+
break
|
|
376
|
+
|
|
377
|
+
return matched
|