crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
crp/advanced/cqs.py ADDED
@@ -0,0 +1,235 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """CQS — Context Quality Signaling, detect LLM context hunger (§12).
4
+
5
+ Three signal types: hedging, reference_miss, repetition.
6
+ Preserves Model Ignorance (Axiom 4): signals are detected structurally
7
+ from generation output, never by injecting meta-protocol into the LLM.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ from collections import Counter
14
+ from dataclasses import dataclass, field
15
+ from typing import Any
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Constants (§12)
19
+ # ---------------------------------------------------------------------------
20
+
21
+ HEDGING_THRESHOLD = 3
22
+ HEDGING_STRENGTH_DIVISOR = 5
23
+ PLACEHOLDER_THRESHOLD = 2
24
+ PLACEHOLDER_STRENGTH_DIVISOR = 3
25
+ REPETITION_THRESHOLD = 3
26
+
27
+ CQS_HEDGING_BUDGET = 2000 # tokens
28
+ CQS_REFERENCE_MISS_BUDGET = 3000
29
+ CQS_REPETITION_BUDGET = 2000
30
+
31
+ REDISPATCH_STRENGTH = 0.8
32
+ REDISPATCH_TOKEN_LIMIT = 500
33
+
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Detection patterns
37
+ # ---------------------------------------------------------------------------
38
+
39
+ _HEDGING_PATTERNS: list[re.Pattern[str]] = [
40
+ re.compile(p, re.IGNORECASE) for p in [
41
+ r"it is unclear whether",
42
+ r"without (?:more|additional|further) information",
43
+ r"cannot (?:determine|confirm|verify)",
44
+ r"(?:may|might|could) (?:be|have)",
45
+ r"insufficient (?:data|evidence|context)",
46
+ r"further (?:analysis|investigation) (?:is )?needed",
47
+ r"based on (?:limited|available) (?:information|data|context)",
48
+ ]
49
+ ]
50
+
51
+ _REFERENCE_MISS_PATTERNS: list[re.Pattern[str]] = [
52
+ re.compile(p, re.IGNORECASE) for p in [
53
+ r"\[(?:need|missing|TODO|TBD|citation needed)\]",
54
+ r"as (?:discussed|mentioned|noted|shown|described) (?:earlier|previously|above|before)",
55
+ r"(?:per|according to|referring to) the (?:previous|prior|earlier) (?:analysis|section|findings)",
56
+ r"(?:see|refer to) (?:section|chapter|part) \d+",
57
+ ]
58
+ ]
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Data types
63
+ # ---------------------------------------------------------------------------
64
+
65
+
66
+ @dataclass
67
+ class ContextHungerSignal:
68
+ """Single context hunger signal detected from LLM output."""
69
+
70
+ signal_type: str # "hedging" | "reference_miss" | "repetition"
71
+ strength: float = 0.0 # 0.0–1.0
72
+ topic: str = ""
73
+ window_id: str = ""
74
+ token_offset: int = 0
75
+ details: dict[str, Any] = field(default_factory=dict)
76
+
77
+
78
+ @dataclass
79
+ class CQSResponse:
80
+ """Response after CQS processing."""
81
+
82
+ action: str = "enrich_next" # "abandon_and_redispatch" | "enrich_next" | "none"
83
+ signals: list[ContextHungerSignal] = field(default_factory=list)
84
+ enrichment_budget: int = 0
85
+ enrichment_topics: list[str] = field(default_factory=list)
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # CQSDetector
90
+ # ---------------------------------------------------------------------------
91
+
92
+
93
+ class CQSDetector:
94
+ """Detect implicit context hunger from LLM generation output."""
95
+
96
+ def detect_context_hunger(
97
+ self,
98
+ generation_text: str,
99
+ window_id: str = "",
100
+ tokens_generated: int | None = None,
101
+ ) -> list[ContextHungerSignal]:
102
+ """Scan generation output for context hunger signals.
103
+
104
+ Returns list of detected signals (may be empty).
105
+ """
106
+ signals: list[ContextHungerSignal] = []
107
+
108
+ # --- Type 1: Hedging ---
109
+ hedging_count = 0
110
+ hedging_topic = ""
111
+ for pat in _HEDGING_PATTERNS:
112
+ matches = pat.findall(generation_text)
113
+ hedging_count += len(matches)
114
+ if matches and not hedging_topic:
115
+ hedging_topic = _extract_uncertain_topic(generation_text, matches[0])
116
+
117
+ if hedging_count >= HEDGING_THRESHOLD:
118
+ signals.append(ContextHungerSignal(
119
+ signal_type="hedging",
120
+ strength=min(hedging_count / HEDGING_STRENGTH_DIVISOR, 1.0),
121
+ topic=hedging_topic,
122
+ window_id=window_id,
123
+ details={"hedging_count": hedging_count},
124
+ ))
125
+
126
+ # --- Type 2: Reference miss ---
127
+ placeholder_count = 0
128
+ ref_topic = ""
129
+ for pat in _REFERENCE_MISS_PATTERNS:
130
+ matches = pat.findall(generation_text)
131
+ placeholder_count += len(matches)
132
+ if matches and not ref_topic:
133
+ ref_topic = _extract_referenced_topic(generation_text, matches[0])
134
+
135
+ if placeholder_count >= PLACEHOLDER_THRESHOLD:
136
+ signals.append(ContextHungerSignal(
137
+ signal_type="reference_miss",
138
+ strength=min(placeholder_count / PLACEHOLDER_STRENGTH_DIVISOR, 1.0),
139
+ topic=ref_topic,
140
+ window_id=window_id,
141
+ details={"placeholder_count": placeholder_count},
142
+ ))
143
+
144
+ # --- Type 3: Repetition ---
145
+ repeated = _detect_repetition(generation_text)
146
+ if repeated:
147
+ rep_topic = max(repeated, key=lambda k: repeated[k])
148
+ signals.append(ContextHungerSignal(
149
+ signal_type="repetition",
150
+ strength=min(len(set(repeated)) / 3, 1.0),
151
+ topic=rep_topic,
152
+ window_id=window_id,
153
+ details={"repeated_items": dict(repeated)},
154
+ ))
155
+
156
+ return signals
157
+
158
+ def respond_to_context_hunger(
159
+ self,
160
+ signals: list[ContextHungerSignal],
161
+ tokens_generated: int = 0,
162
+ ) -> CQSResponse:
163
+ """Determine action based on detected signals.
164
+
165
+ §12.4: If max(strength) >= 0.8 AND tokens < 500 → abandon + redispatch.
166
+ Otherwise → enrich next window.
167
+ """
168
+ if not signals:
169
+ return CQSResponse(action="none")
170
+
171
+ max_strength = max(s.strength for s in signals)
172
+ topics = [s.topic for s in signals if s.topic]
173
+
174
+ # Calculate total enrichment budget
175
+ budget = 0
176
+ for s in signals:
177
+ if s.signal_type == "hedging":
178
+ budget += CQS_HEDGING_BUDGET
179
+ elif s.signal_type == "reference_miss":
180
+ budget += CQS_REFERENCE_MISS_BUDGET
181
+ elif s.signal_type == "repetition":
182
+ budget += CQS_REPETITION_BUDGET
183
+
184
+ if max_strength >= REDISPATCH_STRENGTH and tokens_generated < REDISPATCH_TOKEN_LIMIT:
185
+ return CQSResponse(
186
+ action="abandon_and_redispatch",
187
+ signals=signals,
188
+ enrichment_budget=budget,
189
+ enrichment_topics=topics,
190
+ )
191
+
192
+ return CQSResponse(
193
+ action="enrich_next",
194
+ signals=signals,
195
+ enrichment_budget=budget,
196
+ enrichment_topics=topics,
197
+ )
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Helper functions
202
+ # ---------------------------------------------------------------------------
203
+
204
+
205
+ def _extract_uncertain_topic(text: str, match: str) -> str:
206
+ """Extract the topic surrounding a hedging match."""
207
+ idx = text.lower().find(match.lower())
208
+ if idx < 0:
209
+ return ""
210
+ start = max(0, idx - 50)
211
+ end = min(len(text), idx + len(match) + 50)
212
+ return text[start:end].strip()
213
+
214
+
215
+ def _extract_referenced_topic(text: str, match: str) -> str:
216
+ """Extract the topic surrounding a reference miss."""
217
+ idx = text.lower().find(match.lower())
218
+ if idx < 0:
219
+ return ""
220
+ start = max(0, idx - 50)
221
+ end = min(len(text), idx + len(match) + 50)
222
+ return text[start:end].strip()
223
+
224
+
225
+ def _detect_repetition(text: str) -> dict[str, int]:
226
+ """Find inline facts mentioned >= 3 times."""
227
+ # Extract noun-phrase-like segments (simplified: 2-4 word sequences)
228
+ words = text.split()
229
+ ngrams: Counter[str] = Counter()
230
+ for n in (2, 3, 4):
231
+ for i in range(len(words) - n + 1):
232
+ gram = " ".join(words[i:i + n])
233
+ if len(gram) > 8: # Skip short fragments
234
+ ngrams[gram.lower()] += 1
235
+ return {k: v for k, v in ngrams.items() if v >= REPETITION_THRESHOLD}