crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
crp/ckf/community.py ADDED
@@ -0,0 +1,377 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """CKF Mode 4: Community detection — Leiden cluster summaries (§3.8).
4
+
5
+ Batch community detection per window. Incremental update for <10% change,
6
+ full rebuild for ≥30%. Falls back to connected components when leidenalg
7
+ is unavailable.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ from crp.extraction.types import FactGraph
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Optional dependency check
22
+ # ---------------------------------------------------------------------------
23
+
24
+ _IGRAPH: Any = None
25
+ _LEIDENALG: Any = None
26
+ _CHECKED = False
27
+
28
+
29
+ def _check_leiden() -> bool:
30
+ """Return True if igraph + leidenalg are available."""
31
+ global _IGRAPH, _LEIDENALG, _CHECKED # noqa: PLW0603
32
+ if not _CHECKED:
33
+ try:
34
+ import igraph # type: ignore[import-untyped]
35
+ import leidenalg # type: ignore[import-untyped]
36
+
37
+ _IGRAPH = igraph
38
+ _LEIDENALG = leidenalg
39
+ except ImportError:
40
+ _IGRAPH = None
41
+ _LEIDENALG = None
42
+ _CHECKED = True
43
+ return _IGRAPH is not None and _LEIDENALG is not None
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Data types
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ @dataclass
52
+ class Community:
53
+ """A cluster of semantically related facts."""
54
+
55
+ community_id: int
56
+ fact_ids: list[str] = field(default_factory=list)
57
+ summary: str = ""
58
+ centroid_id: str = ""
59
+ coherence: float = 0.0
60
+
61
+ @property
62
+ def size(self) -> int:
63
+ return len(self.fact_ids)
64
+
65
+
66
+ @dataclass
67
+ class CommunityResult:
68
+ """Result from community detection."""
69
+
70
+ communities: list[Community] = field(default_factory=list)
71
+ fact_to_community: dict[str, int] = field(default_factory=dict)
72
+ used_leiden: bool = False
73
+ modularity: float = 0.0
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Leiden detection (full)
78
+ # ---------------------------------------------------------------------------
79
+
80
+
81
+ def _leiden_detect(graph: FactGraph) -> CommunityResult:
82
+ """Full Leiden community detection using igraph + leidenalg."""
83
+ if not graph.nodes:
84
+ return CommunityResult()
85
+
86
+ ig = _IGRAPH
87
+ la = _LEIDENALG
88
+
89
+ # Build igraph graph
90
+ node_ids = list(graph.nodes.keys())
91
+ id_to_idx = {nid: i for i, nid in enumerate(node_ids)}
92
+
93
+ g = ig.Graph(n=len(node_ids), directed=False)
94
+ edge_list = []
95
+ for edge in graph.edges:
96
+ src = id_to_idx.get(edge.source_id)
97
+ tgt = id_to_idx.get(edge.target_id)
98
+ if src is not None and tgt is not None and src != tgt:
99
+ edge_list.append((src, tgt))
100
+ if edge_list:
101
+ g.add_edges(edge_list)
102
+
103
+ # Run Leiden
104
+ partition = la.find_partition(g, la.ModularityVertexPartition)
105
+
106
+ # Build communities
107
+ communities: dict[int, list[str]] = {}
108
+ fact_to_community: dict[str, int] = {}
109
+
110
+ for idx, comm_id in enumerate(partition.membership):
111
+ fid = node_ids[idx]
112
+ communities.setdefault(comm_id, []).append(fid)
113
+ fact_to_community[fid] = comm_id
114
+
115
+ result_communities = []
116
+ for cid, fids in sorted(communities.items()):
117
+ centroid = _pick_centroid(graph, fids)
118
+ summary = _summarize_community(graph, fids)
119
+ result_communities.append(
120
+ Community(
121
+ community_id=cid,
122
+ fact_ids=fids,
123
+ summary=summary,
124
+ centroid_id=centroid,
125
+ )
126
+ )
127
+
128
+ return CommunityResult(
129
+ communities=result_communities,
130
+ fact_to_community=fact_to_community,
131
+ used_leiden=True,
132
+ modularity=partition.modularity,
133
+ )
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # Fallback: connected components
138
+ # ---------------------------------------------------------------------------
139
+
140
+
141
+ def _connected_components_detect(graph: FactGraph) -> CommunityResult:
142
+ """Fallback community detection using BFS connected components."""
143
+ if not graph.nodes:
144
+ return CommunityResult()
145
+
146
+ # Build adjacency
147
+ adj: dict[str, set[str]] = {nid: set() for nid in graph.nodes}
148
+ for edge in graph.edges:
149
+ if edge.source_id in adj and edge.target_id in adj:
150
+ adj[edge.source_id].add(edge.target_id)
151
+ adj[edge.target_id].add(edge.source_id)
152
+
153
+ visited: set[str] = set()
154
+ communities: list[Community] = []
155
+ fact_to_community: dict[str, int] = {}
156
+ comm_id = 0
157
+
158
+ for start in graph.nodes:
159
+ if start in visited:
160
+ continue
161
+ # BFS
162
+ component: list[str] = []
163
+ queue = [start]
164
+ while queue:
165
+ node = queue.pop()
166
+ if node in visited:
167
+ continue
168
+ visited.add(node)
169
+ component.append(node)
170
+ for neighbour in adj.get(node, set()):
171
+ if neighbour not in visited:
172
+ queue.append(neighbour)
173
+
174
+ centroid = _pick_centroid(graph, component)
175
+ summary = _summarize_community(graph, component)
176
+ for fid in component:
177
+ fact_to_community[fid] = comm_id
178
+ communities.append(
179
+ Community(
180
+ community_id=comm_id,
181
+ fact_ids=component,
182
+ summary=summary,
183
+ centroid_id=centroid,
184
+ )
185
+ )
186
+ comm_id += 1
187
+
188
+ return CommunityResult(
189
+ communities=communities,
190
+ fact_to_community=fact_to_community,
191
+ used_leiden=False,
192
+ )
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Helpers
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def _pick_centroid(graph: FactGraph, fact_ids: list[str]) -> str:
201
+ """Pick the fact with highest confidence as community centroid."""
202
+ best_id = ""
203
+ best_conf = -1.0
204
+ for fid in fact_ids:
205
+ fact = graph.nodes.get(fid)
206
+ if fact and (fact.confidence or 0.0) > best_conf:
207
+ best_conf = fact.confidence or 0.0
208
+ best_id = fid
209
+ return best_id
210
+
211
+
212
+ def _summarize_community(graph: FactGraph, fact_ids: list[str]) -> str:
213
+ """Build a short summary from the top-3 facts by confidence."""
214
+ facts = []
215
+ for fid in fact_ids:
216
+ fact = graph.nodes.get(fid)
217
+ if fact:
218
+ facts.append(fact)
219
+ facts.sort(key=lambda f: -(f.confidence or 0.0))
220
+ top = facts[:3]
221
+ if not top:
222
+ return ""
223
+ return "; ".join(f.text for f in top if f.text)
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Incremental update manager
228
+ # ---------------------------------------------------------------------------
229
+
230
+ # Thresholds for incremental vs full rebuild
231
+ INCREMENTAL_THRESHOLD = 0.10 # <10% change → incremental
232
+ FULL_REBUILD_THRESHOLD = 0.30 # ≥30% change → full rebuild
233
+
234
+
235
+ class CommunityDetector:
236
+ """Manages community detection with incremental updates.
237
+
238
+ Tracks the previous community state and decides whether to run
239
+ a full rebuild or incremental update based on the change ratio.
240
+ """
241
+
242
+ def __init__(self) -> None:
243
+ self._last_result: CommunityResult | None = None
244
+ self._last_node_count: int = 0
245
+ self._last_edge_count: int = 0
246
+
247
+ def detect(self, graph: FactGraph) -> CommunityResult:
248
+ """Run community detection, choosing strategy based on change ratio."""
249
+ node_count = len(graph.nodes)
250
+ edge_count = len(graph.edges)
251
+
252
+ if self._last_result is None:
253
+ # First run — always full
254
+ result = self._full_detect(graph)
255
+ else:
256
+ change_ratio = self._compute_change_ratio(node_count, edge_count)
257
+ if change_ratio >= FULL_REBUILD_THRESHOLD:
258
+ result = self._full_detect(graph)
259
+ elif change_ratio < INCREMENTAL_THRESHOLD:
260
+ # Very small change — reuse previous result with minor updates
261
+ result = self._incremental_update(graph)
262
+ else:
263
+ # Between thresholds — full rebuild
264
+ result = self._full_detect(graph)
265
+
266
+ self._last_result = result
267
+ self._last_node_count = node_count
268
+ self._last_edge_count = edge_count
269
+ return result
270
+
271
+ def _compute_change_ratio(self, node_count: int, edge_count: int) -> float:
272
+ """Compute approximate change ratio since last detection."""
273
+ if self._last_node_count == 0:
274
+ return 1.0
275
+ node_delta = abs(node_count - self._last_node_count)
276
+ edge_delta = abs(edge_count - self._last_edge_count)
277
+ return max(
278
+ node_delta / max(self._last_node_count, 1),
279
+ edge_delta / max(self._last_edge_count, 1),
280
+ )
281
+
282
+ def _full_detect(self, graph: FactGraph) -> CommunityResult:
283
+ """Full detection — Leiden if available, else connected components."""
284
+ if _check_leiden():
285
+ try:
286
+ return _leiden_detect(graph)
287
+ except Exception: # noqa: BLE001
288
+ logger.warning("Leiden detection failed, falling back to connected components")
289
+ return _connected_components_detect(graph)
290
+
291
+ def _incremental_update(self, graph: FactGraph) -> CommunityResult:
292
+ """Reuse previous communities, only assigning new nodes to nearest community."""
293
+ if self._last_result is None:
294
+ return self._full_detect(graph)
295
+
296
+ prev = self._last_result
297
+ new_nodes = set(graph.nodes.keys()) - set(prev.fact_to_community.keys())
298
+ removed_nodes = set(prev.fact_to_community.keys()) - set(graph.nodes.keys())
299
+
300
+ if not new_nodes and not removed_nodes:
301
+ return prev
302
+
303
+ # Copy existing assignments (removing deleted nodes)
304
+ fact_to_comm = {
305
+ fid: cid
306
+ for fid, cid in prev.fact_to_community.items()
307
+ if fid not in removed_nodes
308
+ }
309
+
310
+ # Assign new nodes to community of their best-connected neighbour
311
+ for fid in new_nodes:
312
+ # Find connected existing nodes
313
+ neighbour_comms: dict[int, int] = {}
314
+ for edge in graph.edges:
315
+ peer = None
316
+ if edge.source_id == fid and edge.target_id in fact_to_comm:
317
+ peer = edge.target_id
318
+ elif edge.target_id == fid and edge.source_id in fact_to_comm:
319
+ peer = edge.source_id
320
+ if peer:
321
+ cid = fact_to_comm[peer]
322
+ neighbour_comms[cid] = neighbour_comms.get(cid, 0) + 1
323
+
324
+ if neighbour_comms:
325
+ # Assign to most frequent community
326
+ best_cid = max(neighbour_comms, key=lambda c: neighbour_comms[c])
327
+ fact_to_comm[fid] = best_cid
328
+ else:
329
+ # Isolated new node — create a new community
330
+ max_cid = max(fact_to_comm.values()) + 1 if fact_to_comm else 0
331
+ fact_to_comm[fid] = max_cid
332
+
333
+ # Rebuild community objects
334
+ communities_dict: dict[int, list[str]] = {}
335
+ for fid, cid in fact_to_comm.items():
336
+ communities_dict.setdefault(cid, []).append(fid)
337
+
338
+ communities = []
339
+ for cid, fids in sorted(communities_dict.items()):
340
+ centroid = _pick_centroid(graph, fids)
341
+ summary = _summarize_community(graph, fids)
342
+ communities.append(
343
+ Community(
344
+ community_id=cid,
345
+ fact_ids=fids,
346
+ summary=summary,
347
+ centroid_id=centroid,
348
+ )
349
+ )
350
+
351
+ return CommunityResult(
352
+ communities=communities,
353
+ fact_to_community=fact_to_comm,
354
+ used_leiden=prev.used_leiden,
355
+ )
356
+
357
+ def community_summary(self, graph: FactGraph, topic: str) -> list[Community]:
358
+ """Return communities matching *topic* (substring or keyword match)."""
359
+ if self._last_result is None:
360
+ self.detect(graph)
361
+ if self._last_result is None:
362
+ return []
363
+
364
+ topic_lower = topic.lower()
365
+ matched = []
366
+ for comm in self._last_result.communities:
367
+ # Check if topic appears in any fact text or summary
368
+ if topic_lower in comm.summary.lower():
369
+ matched.append(comm)
370
+ continue
371
+ for fid in comm.fact_ids:
372
+ fact = graph.nodes.get(fid)
373
+ if fact and topic_lower in fact.text.lower():
374
+ matched.append(comm)
375
+ break
376
+
377
+ return matched