crprotocol 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crp/__init__.py +126 -0
- crp/__main__.py +8 -0
- crp/_typing.py +27 -0
- crp/_version.py +5 -0
- crp/adapters.py +31 -0
- crp/advanced/__init__.py +40 -0
- crp/advanced/auto_ingest.py +400 -0
- crp/advanced/cqs.py +235 -0
- crp/advanced/cross_window.py +477 -0
- crp/advanced/curator.py +265 -0
- crp/advanced/feedback.py +146 -0
- crp/advanced/hierarchical.py +211 -0
- crp/advanced/meta_learning.py +401 -0
- crp/advanced/parallel.py +98 -0
- crp/advanced/review_cycle.py +329 -0
- crp/advanced/scale_mode.py +129 -0
- crp/advanced/source_grounding.py +207 -0
- crp/ckf/__init__.py +35 -0
- crp/ckf/community.py +377 -0
- crp/ckf/fabric.py +445 -0
- crp/ckf/gc.py +175 -0
- crp/ckf/graph_walk.py +87 -0
- crp/ckf/merge.py +133 -0
- crp/ckf/pattern_query.py +122 -0
- crp/ckf/pubsub.py +128 -0
- crp/ckf/semantic.py +207 -0
- crp/cli/__init__.py +7 -0
- crp/cli/main.py +329 -0
- crp/cli/sidecar.py +929 -0
- crp/cli/startup.py +272 -0
- crp/continuation/__init__.py +103 -0
- crp/continuation/completion.py +348 -0
- crp/continuation/degradation.py +157 -0
- crp/continuation/document_map.py +160 -0
- crp/continuation/flow.py +109 -0
- crp/continuation/gap.py +419 -0
- crp/continuation/manager.py +484 -0
- crp/continuation/quality_monitor.py +179 -0
- crp/continuation/stitch.py +419 -0
- crp/continuation/trigger.py +142 -0
- crp/continuation/voice.py +157 -0
- crp/core/__init__.py +69 -0
- crp/core/batch.py +77 -0
- crp/core/circuit_breaker.py +116 -0
- crp/core/config.py +377 -0
- crp/core/context_tools.py +540 -0
- crp/core/dispatch_router.py +3977 -0
- crp/core/errors.py +128 -0
- crp/core/extraction_facade.py +384 -0
- crp/core/facilitator.py +713 -0
- crp/core/idempotency.py +215 -0
- crp/core/orchestrator.py +1435 -0
- crp/core/relay_strategies.py +613 -0
- crp/core/security_manager.py +140 -0
- crp/core/session.py +134 -0
- crp/core/task_intent.py +36 -0
- crp/core/window.py +363 -0
- crp/envelope/__init__.py +30 -0
- crp/envelope/builder.py +288 -0
- crp/envelope/decomposer.py +236 -0
- crp/envelope/formatter.py +168 -0
- crp/envelope/packer.py +211 -0
- crp/envelope/reranker.py +209 -0
- crp/envelope/scoring.py +310 -0
- crp/extraction/__init__.py +45 -0
- crp/extraction/complexity.py +96 -0
- crp/extraction/contradiction.py +132 -0
- crp/extraction/pipeline.py +360 -0
- crp/extraction/quality_gate.py +237 -0
- crp/extraction/stage1_regex.py +173 -0
- crp/extraction/stage2_statistical.py +244 -0
- crp/extraction/stage3_gliner.py +210 -0
- crp/extraction/stage4_uie.py +183 -0
- crp/extraction/stage5_discourse.py +175 -0
- crp/extraction/stage6_llm.py +178 -0
- crp/extraction/structured_output.py +219 -0
- crp/extraction/types.py +299 -0
- crp/license_guard.py +722 -0
- crp/observability/__init__.py +30 -0
- crp/observability/audit.py +118 -0
- crp/observability/events.py +233 -0
- crp/observability/metrics.py +264 -0
- crp/observability/quality.py +135 -0
- crp/observability/structured_logging.py +81 -0
- crp/observability/telemetry.py +117 -0
- crp/provenance/__init__.py +314 -0
- crp/provenance/_embeddings.py +97 -0
- crp/provenance/_types.py +378 -0
- crp/provenance/attribution_scorer.py +252 -0
- crp/provenance/claim_detector.py +229 -0
- crp/provenance/contradiction_detector.py +243 -0
- crp/provenance/distortion_detector.py +397 -0
- crp/provenance/entailment_verifier.py +358 -0
- crp/provenance/fabrication_detector.py +203 -0
- crp/provenance/hallucination_scorer.py +320 -0
- crp/provenance/omission_analyzer.py +106 -0
- crp/provenance/provenance_chain.py +205 -0
- crp/provenance/report_generator.py +440 -0
- crp/providers/__init__.py +43 -0
- crp/providers/anthropic.py +270 -0
- crp/providers/base.py +135 -0
- crp/providers/custom.py +63 -0
- crp/providers/diagnostic.py +251 -0
- crp/providers/llamacpp.py +224 -0
- crp/providers/manager.py +139 -0
- crp/providers/ollama.py +243 -0
- crp/providers/openai.py +628 -0
- crp/providers/tokenizers.py +48 -0
- crp/py.typed +0 -0
- crp/resources/__init__.py +53 -0
- crp/resources/adaptive_allocator.py +525 -0
- crp/resources/cost_model.py +388 -0
- crp/resources/overhead_manager.py +217 -0
- crp/resources/resource_manager.py +262 -0
- crp/schemas/__init__.py +20 -0
- crp/schemas/cost-estimate.json +33 -0
- crp/schemas/crp-error.json +43 -0
- crp/schemas/envelope-preview.json +40 -0
- crp/schemas/persisted-state-header.json +27 -0
- crp/schemas/quality-report.json +94 -0
- crp/schemas/session-handle.json +33 -0
- crp/schemas/session-status.json +57 -0
- crp/schemas/stream-event.json +18 -0
- crp/schemas/task-intent.json +42 -0
- crp/security/__init__.py +93 -0
- crp/security/audit_trail.py +392 -0
- crp/security/binding.py +192 -0
- crp/security/compliance.py +813 -0
- crp/security/consent.py +593 -0
- crp/security/embedding_defense.py +161 -0
- crp/security/encryption.py +202 -0
- crp/security/injection.py +335 -0
- crp/security/integrity.py +267 -0
- crp/security/privacy.py +662 -0
- crp/security/quarantine.py +249 -0
- crp/security/rbac.py +221 -0
- crp/security/validation.py +164 -0
- crp/state/__init__.py +31 -0
- crp/state/cold_storage.py +258 -0
- crp/state/compaction.py +263 -0
- crp/state/critical_state.py +104 -0
- crp/state/event_log.py +313 -0
- crp/state/fact.py +189 -0
- crp/state/serialization.py +189 -0
- crp/state/session_cleanup.py +77 -0
- crp/state/snapshot.py +290 -0
- crp/state/warm_store.py +346 -0
- crprotocol-2.0.0.dist-info/METADATA +1295 -0
- crprotocol-2.0.0.dist-info/RECORD +153 -0
- crprotocol-2.0.0.dist-info/WHEEL +4 -0
- crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
- crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
- crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
crp/advanced/cqs.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
|
|
2
|
+
# Licensed under Elastic License 2.0 — see LICENSE.md for details.
|
|
3
|
+
"""CQS — Context Quality Signaling, detect LLM context hunger (§12).
|
|
4
|
+
|
|
5
|
+
Three signal types: hedging, reference_miss, repetition.
|
|
6
|
+
Preserves Model Ignorance (Axiom 4): signals are detected structurally
|
|
7
|
+
from generation output, never by injecting meta-protocol into the LLM.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from collections import Counter
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Constants (§12)
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
HEDGING_THRESHOLD = 3
|
|
22
|
+
HEDGING_STRENGTH_DIVISOR = 5
|
|
23
|
+
PLACEHOLDER_THRESHOLD = 2
|
|
24
|
+
PLACEHOLDER_STRENGTH_DIVISOR = 3
|
|
25
|
+
REPETITION_THRESHOLD = 3
|
|
26
|
+
|
|
27
|
+
CQS_HEDGING_BUDGET = 2000 # tokens
|
|
28
|
+
CQS_REFERENCE_MISS_BUDGET = 3000
|
|
29
|
+
CQS_REPETITION_BUDGET = 2000
|
|
30
|
+
|
|
31
|
+
REDISPATCH_STRENGTH = 0.8
|
|
32
|
+
REDISPATCH_TOKEN_LIMIT = 500
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Detection patterns
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
_HEDGING_PATTERNS: list[re.Pattern[str]] = [
|
|
40
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
41
|
+
r"it is unclear whether",
|
|
42
|
+
r"without (?:more|additional|further) information",
|
|
43
|
+
r"cannot (?:determine|confirm|verify)",
|
|
44
|
+
r"(?:may|might|could) (?:be|have)",
|
|
45
|
+
r"insufficient (?:data|evidence|context)",
|
|
46
|
+
r"further (?:analysis|investigation) (?:is )?needed",
|
|
47
|
+
r"based on (?:limited|available) (?:information|data|context)",
|
|
48
|
+
]
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
_REFERENCE_MISS_PATTERNS: list[re.Pattern[str]] = [
|
|
52
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
53
|
+
r"\[(?:need|missing|TODO|TBD|citation needed)\]",
|
|
54
|
+
r"as (?:discussed|mentioned|noted|shown|described) (?:earlier|previously|above|before)",
|
|
55
|
+
r"(?:per|according to|referring to) the (?:previous|prior|earlier) (?:analysis|section|findings)",
|
|
56
|
+
r"(?:see|refer to) (?:section|chapter|part) \d+",
|
|
57
|
+
]
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# Data types
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class ContextHungerSignal:
|
|
68
|
+
"""Single context hunger signal detected from LLM output."""
|
|
69
|
+
|
|
70
|
+
signal_type: str # "hedging" | "reference_miss" | "repetition"
|
|
71
|
+
strength: float = 0.0 # 0.0–1.0
|
|
72
|
+
topic: str = ""
|
|
73
|
+
window_id: str = ""
|
|
74
|
+
token_offset: int = 0
|
|
75
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class CQSResponse:
|
|
80
|
+
"""Response after CQS processing."""
|
|
81
|
+
|
|
82
|
+
action: str = "enrich_next" # "abandon_and_redispatch" | "enrich_next" | "none"
|
|
83
|
+
signals: list[ContextHungerSignal] = field(default_factory=list)
|
|
84
|
+
enrichment_budget: int = 0
|
|
85
|
+
enrichment_topics: list[str] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# CQSDetector
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class CQSDetector:
|
|
94
|
+
"""Detect implicit context hunger from LLM generation output."""
|
|
95
|
+
|
|
96
|
+
def detect_context_hunger(
|
|
97
|
+
self,
|
|
98
|
+
generation_text: str,
|
|
99
|
+
window_id: str = "",
|
|
100
|
+
tokens_generated: int | None = None,
|
|
101
|
+
) -> list[ContextHungerSignal]:
|
|
102
|
+
"""Scan generation output for context hunger signals.
|
|
103
|
+
|
|
104
|
+
Returns list of detected signals (may be empty).
|
|
105
|
+
"""
|
|
106
|
+
signals: list[ContextHungerSignal] = []
|
|
107
|
+
|
|
108
|
+
# --- Type 1: Hedging ---
|
|
109
|
+
hedging_count = 0
|
|
110
|
+
hedging_topic = ""
|
|
111
|
+
for pat in _HEDGING_PATTERNS:
|
|
112
|
+
matches = pat.findall(generation_text)
|
|
113
|
+
hedging_count += len(matches)
|
|
114
|
+
if matches and not hedging_topic:
|
|
115
|
+
hedging_topic = _extract_uncertain_topic(generation_text, matches[0])
|
|
116
|
+
|
|
117
|
+
if hedging_count >= HEDGING_THRESHOLD:
|
|
118
|
+
signals.append(ContextHungerSignal(
|
|
119
|
+
signal_type="hedging",
|
|
120
|
+
strength=min(hedging_count / HEDGING_STRENGTH_DIVISOR, 1.0),
|
|
121
|
+
topic=hedging_topic,
|
|
122
|
+
window_id=window_id,
|
|
123
|
+
details={"hedging_count": hedging_count},
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
# --- Type 2: Reference miss ---
|
|
127
|
+
placeholder_count = 0
|
|
128
|
+
ref_topic = ""
|
|
129
|
+
for pat in _REFERENCE_MISS_PATTERNS:
|
|
130
|
+
matches = pat.findall(generation_text)
|
|
131
|
+
placeholder_count += len(matches)
|
|
132
|
+
if matches and not ref_topic:
|
|
133
|
+
ref_topic = _extract_referenced_topic(generation_text, matches[0])
|
|
134
|
+
|
|
135
|
+
if placeholder_count >= PLACEHOLDER_THRESHOLD:
|
|
136
|
+
signals.append(ContextHungerSignal(
|
|
137
|
+
signal_type="reference_miss",
|
|
138
|
+
strength=min(placeholder_count / PLACEHOLDER_STRENGTH_DIVISOR, 1.0),
|
|
139
|
+
topic=ref_topic,
|
|
140
|
+
window_id=window_id,
|
|
141
|
+
details={"placeholder_count": placeholder_count},
|
|
142
|
+
))
|
|
143
|
+
|
|
144
|
+
# --- Type 3: Repetition ---
|
|
145
|
+
repeated = _detect_repetition(generation_text)
|
|
146
|
+
if repeated:
|
|
147
|
+
rep_topic = max(repeated, key=lambda k: repeated[k])
|
|
148
|
+
signals.append(ContextHungerSignal(
|
|
149
|
+
signal_type="repetition",
|
|
150
|
+
strength=min(len(set(repeated)) / 3, 1.0),
|
|
151
|
+
topic=rep_topic,
|
|
152
|
+
window_id=window_id,
|
|
153
|
+
details={"repeated_items": dict(repeated)},
|
|
154
|
+
))
|
|
155
|
+
|
|
156
|
+
return signals
|
|
157
|
+
|
|
158
|
+
def respond_to_context_hunger(
|
|
159
|
+
self,
|
|
160
|
+
signals: list[ContextHungerSignal],
|
|
161
|
+
tokens_generated: int = 0,
|
|
162
|
+
) -> CQSResponse:
|
|
163
|
+
"""Determine action based on detected signals.
|
|
164
|
+
|
|
165
|
+
§12.4: If max(strength) >= 0.8 AND tokens < 500 → abandon + redispatch.
|
|
166
|
+
Otherwise → enrich next window.
|
|
167
|
+
"""
|
|
168
|
+
if not signals:
|
|
169
|
+
return CQSResponse(action="none")
|
|
170
|
+
|
|
171
|
+
max_strength = max(s.strength for s in signals)
|
|
172
|
+
topics = [s.topic for s in signals if s.topic]
|
|
173
|
+
|
|
174
|
+
# Calculate total enrichment budget
|
|
175
|
+
budget = 0
|
|
176
|
+
for s in signals:
|
|
177
|
+
if s.signal_type == "hedging":
|
|
178
|
+
budget += CQS_HEDGING_BUDGET
|
|
179
|
+
elif s.signal_type == "reference_miss":
|
|
180
|
+
budget += CQS_REFERENCE_MISS_BUDGET
|
|
181
|
+
elif s.signal_type == "repetition":
|
|
182
|
+
budget += CQS_REPETITION_BUDGET
|
|
183
|
+
|
|
184
|
+
if max_strength >= REDISPATCH_STRENGTH and tokens_generated < REDISPATCH_TOKEN_LIMIT:
|
|
185
|
+
return CQSResponse(
|
|
186
|
+
action="abandon_and_redispatch",
|
|
187
|
+
signals=signals,
|
|
188
|
+
enrichment_budget=budget,
|
|
189
|
+
enrichment_topics=topics,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return CQSResponse(
|
|
193
|
+
action="enrich_next",
|
|
194
|
+
signals=signals,
|
|
195
|
+
enrichment_budget=budget,
|
|
196
|
+
enrichment_topics=topics,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
# Helper functions
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _extract_uncertain_topic(text: str, match: str) -> str:
|
|
206
|
+
"""Extract the topic surrounding a hedging match."""
|
|
207
|
+
idx = text.lower().find(match.lower())
|
|
208
|
+
if idx < 0:
|
|
209
|
+
return ""
|
|
210
|
+
start = max(0, idx - 50)
|
|
211
|
+
end = min(len(text), idx + len(match) + 50)
|
|
212
|
+
return text[start:end].strip()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _extract_referenced_topic(text: str, match: str) -> str:
|
|
216
|
+
"""Extract the topic surrounding a reference miss."""
|
|
217
|
+
idx = text.lower().find(match.lower())
|
|
218
|
+
if idx < 0:
|
|
219
|
+
return ""
|
|
220
|
+
start = max(0, idx - 50)
|
|
221
|
+
end = min(len(text), idx + len(match) + 50)
|
|
222
|
+
return text[start:end].strip()
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _detect_repetition(text: str) -> dict[str, int]:
|
|
226
|
+
"""Find inline facts mentioned >= 3 times."""
|
|
227
|
+
# Extract noun-phrase-like segments (simplified: 2-4 word sequences)
|
|
228
|
+
words = text.split()
|
|
229
|
+
ngrams: Counter[str] = Counter()
|
|
230
|
+
for n in (2, 3, 4):
|
|
231
|
+
for i in range(len(words) - n + 1):
|
|
232
|
+
gram = " ".join(words[i:i + n])
|
|
233
|
+
if len(gram) > 8: # Skip short fragments
|
|
234
|
+
ngrams[gram.lower()] += 1
|
|
235
|
+
return {k: v for k, v in ngrams.items() if v >= REPETITION_THRESHOLD}
|