crprotocol 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crp/__init__.py +126 -0
- crp/__main__.py +8 -0
- crp/_typing.py +27 -0
- crp/_version.py +5 -0
- crp/adapters.py +31 -0
- crp/advanced/__init__.py +40 -0
- crp/advanced/auto_ingest.py +400 -0
- crp/advanced/cqs.py +235 -0
- crp/advanced/cross_window.py +477 -0
- crp/advanced/curator.py +265 -0
- crp/advanced/feedback.py +146 -0
- crp/advanced/hierarchical.py +211 -0
- crp/advanced/meta_learning.py +401 -0
- crp/advanced/parallel.py +98 -0
- crp/advanced/review_cycle.py +329 -0
- crp/advanced/scale_mode.py +129 -0
- crp/advanced/source_grounding.py +207 -0
- crp/ckf/__init__.py +35 -0
- crp/ckf/community.py +377 -0
- crp/ckf/fabric.py +445 -0
- crp/ckf/gc.py +175 -0
- crp/ckf/graph_walk.py +87 -0
- crp/ckf/merge.py +133 -0
- crp/ckf/pattern_query.py +122 -0
- crp/ckf/pubsub.py +128 -0
- crp/ckf/semantic.py +207 -0
- crp/cli/__init__.py +7 -0
- crp/cli/main.py +329 -0
- crp/cli/sidecar.py +929 -0
- crp/cli/startup.py +272 -0
- crp/continuation/__init__.py +103 -0
- crp/continuation/completion.py +348 -0
- crp/continuation/degradation.py +157 -0
- crp/continuation/document_map.py +160 -0
- crp/continuation/flow.py +109 -0
- crp/continuation/gap.py +419 -0
- crp/continuation/manager.py +484 -0
- crp/continuation/quality_monitor.py +179 -0
- crp/continuation/stitch.py +419 -0
- crp/continuation/trigger.py +142 -0
- crp/continuation/voice.py +157 -0
- crp/core/__init__.py +69 -0
- crp/core/batch.py +77 -0
- crp/core/circuit_breaker.py +116 -0
- crp/core/config.py +377 -0
- crp/core/context_tools.py +540 -0
- crp/core/dispatch_router.py +3977 -0
- crp/core/errors.py +128 -0
- crp/core/extraction_facade.py +384 -0
- crp/core/facilitator.py +713 -0
- crp/core/idempotency.py +215 -0
- crp/core/orchestrator.py +1435 -0
- crp/core/relay_strategies.py +613 -0
- crp/core/security_manager.py +140 -0
- crp/core/session.py +134 -0
- crp/core/task_intent.py +36 -0
- crp/core/window.py +363 -0
- crp/envelope/__init__.py +30 -0
- crp/envelope/builder.py +288 -0
- crp/envelope/decomposer.py +236 -0
- crp/envelope/formatter.py +168 -0
- crp/envelope/packer.py +211 -0
- crp/envelope/reranker.py +209 -0
- crp/envelope/scoring.py +310 -0
- crp/extraction/__init__.py +45 -0
- crp/extraction/complexity.py +96 -0
- crp/extraction/contradiction.py +132 -0
- crp/extraction/pipeline.py +360 -0
- crp/extraction/quality_gate.py +237 -0
- crp/extraction/stage1_regex.py +173 -0
- crp/extraction/stage2_statistical.py +244 -0
- crp/extraction/stage3_gliner.py +210 -0
- crp/extraction/stage4_uie.py +183 -0
- crp/extraction/stage5_discourse.py +175 -0
- crp/extraction/stage6_llm.py +178 -0
- crp/extraction/structured_output.py +219 -0
- crp/extraction/types.py +299 -0
- crp/license_guard.py +722 -0
- crp/observability/__init__.py +30 -0
- crp/observability/audit.py +118 -0
- crp/observability/events.py +233 -0
- crp/observability/metrics.py +264 -0
- crp/observability/quality.py +135 -0
- crp/observability/structured_logging.py +81 -0
- crp/observability/telemetry.py +117 -0
- crp/provenance/__init__.py +314 -0
- crp/provenance/_embeddings.py +97 -0
- crp/provenance/_types.py +378 -0
- crp/provenance/attribution_scorer.py +252 -0
- crp/provenance/claim_detector.py +229 -0
- crp/provenance/contradiction_detector.py +243 -0
- crp/provenance/distortion_detector.py +397 -0
- crp/provenance/entailment_verifier.py +358 -0
- crp/provenance/fabrication_detector.py +203 -0
- crp/provenance/hallucination_scorer.py +320 -0
- crp/provenance/omission_analyzer.py +106 -0
- crp/provenance/provenance_chain.py +205 -0
- crp/provenance/report_generator.py +440 -0
- crp/providers/__init__.py +43 -0
- crp/providers/anthropic.py +270 -0
- crp/providers/base.py +135 -0
- crp/providers/custom.py +63 -0
- crp/providers/diagnostic.py +251 -0
- crp/providers/llamacpp.py +224 -0
- crp/providers/manager.py +139 -0
- crp/providers/ollama.py +243 -0
- crp/providers/openai.py +628 -0
- crp/providers/tokenizers.py +48 -0
- crp/py.typed +0 -0
- crp/resources/__init__.py +53 -0
- crp/resources/adaptive_allocator.py +525 -0
- crp/resources/cost_model.py +388 -0
- crp/resources/overhead_manager.py +217 -0
- crp/resources/resource_manager.py +262 -0
- crp/schemas/__init__.py +20 -0
- crp/schemas/cost-estimate.json +33 -0
- crp/schemas/crp-error.json +43 -0
- crp/schemas/envelope-preview.json +40 -0
- crp/schemas/persisted-state-header.json +27 -0
- crp/schemas/quality-report.json +94 -0
- crp/schemas/session-handle.json +33 -0
- crp/schemas/session-status.json +57 -0
- crp/schemas/stream-event.json +18 -0
- crp/schemas/task-intent.json +42 -0
- crp/security/__init__.py +93 -0
- crp/security/audit_trail.py +392 -0
- crp/security/binding.py +192 -0
- crp/security/compliance.py +813 -0
- crp/security/consent.py +593 -0
- crp/security/embedding_defense.py +161 -0
- crp/security/encryption.py +202 -0
- crp/security/injection.py +335 -0
- crp/security/integrity.py +267 -0
- crp/security/privacy.py +662 -0
- crp/security/quarantine.py +249 -0
- crp/security/rbac.py +221 -0
- crp/security/validation.py +164 -0
- crp/state/__init__.py +31 -0
- crp/state/cold_storage.py +258 -0
- crp/state/compaction.py +263 -0
- crp/state/critical_state.py +104 -0
- crp/state/event_log.py +313 -0
- crp/state/fact.py +189 -0
- crp/state/serialization.py +189 -0
- crp/state/session_cleanup.py +77 -0
- crp/state/snapshot.py +290 -0
- crp/state/warm_store.py +346 -0
- crprotocol-2.0.0.dist-info/METADATA +1295 -0
- crprotocol-2.0.0.dist-info/RECORD +153 -0
- crprotocol-2.0.0.dist-info/WHEEL +4 -0
- crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
- crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
- crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
|
|
2
|
+
# Licensed under Elastic License 2.0 — see LICENSE.md for details.
|
|
3
|
+
"""Contradiction Detector — catch self-contradictions in LLM output.
|
|
4
|
+
|
|
5
|
+
Within the same dispatch window:
|
|
6
|
+
Claim 2: "The system is secure."
|
|
7
|
+
Claim 7: "The system has critical vulnerabilities."
|
|
8
|
+
|
|
9
|
+
Across windows (if prior claims supplied):
|
|
10
|
+
Window 1: "Revenue increased 10%."
|
|
11
|
+
Window 3: "Revenue declined significantly."
|
|
12
|
+
|
|
13
|
+
This module detects contradictions through three signals:
|
|
14
|
+
1. NEGATION conflicts — same content words + negation flip
|
|
15
|
+
2. NUMBER conflicts — same entity referenced with different values
|
|
16
|
+
3. SEMANTIC conflicts — high similarity + opposing sentiment signals
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
|
|
23
|
+
from ._types import (
|
|
24
|
+
ClaimAttribution,
|
|
25
|
+
ClaimType,
|
|
26
|
+
ContradictionResult,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Internal helpers
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
_NEGATION_WORDS = frozenset({
|
|
35
|
+
"not", "no", "never", "neither", "nor", "none",
|
|
36
|
+
"doesn't", "don't", "didn't", "isn't", "aren't", "wasn't",
|
|
37
|
+
"weren't", "won't", "wouldn't", "shouldn't", "couldn't",
|
|
38
|
+
"can't", "cannot", "hasn't", "haven't", "hadn't",
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
_POSITIVE_SIGNALS = frozenset({
|
|
42
|
+
"increased", "grew", "improved", "succeeded", "safe",
|
|
43
|
+
"secure", "stable", "reliable", "positive", "effective",
|
|
44
|
+
"beneficial", "strong", "healthy", "profitable", "rising",
|
|
45
|
+
"gaining", "expanding", "accelerating",
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
_NEGATIVE_SIGNALS = frozenset({
|
|
49
|
+
"decreased", "declined", "worsened", "failed", "unsafe",
|
|
50
|
+
"insecure", "unstable", "unreliable", "negative", "ineffective",
|
|
51
|
+
"harmful", "weak", "unhealthy", "unprofitable", "falling",
|
|
52
|
+
"losing", "contracting", "decelerating", "vulnerable",
|
|
53
|
+
"critical", "severe", "dangerous",
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# Numbers in context: extract (subject_words, number_value) pairs
|
|
57
|
+
_NUM_RE = re.compile(r"(\d[\d,]*(?:\.\d+)?)\s*%?")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _content_words(text: str) -> set[str]:
|
|
61
|
+
"""Extract non-stopword content words (lowered)."""
|
|
62
|
+
stop = {
|
|
63
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to",
|
|
64
|
+
"for", "of", "is", "it", "are", "was", "were", "be", "been",
|
|
65
|
+
"being", "have", "has", "had", "this", "that", "these", "those",
|
|
66
|
+
"with", "from", "by", "as", "will", "would",
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
w for w in re.findall(r"[a-z]+", text.lower())
|
|
70
|
+
if w not in stop and len(w) > 2
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _has_negation(text: str) -> bool:
|
|
75
|
+
"""Check if text contains negation words."""
|
|
76
|
+
words = set(re.findall(r"[a-z']+", text.lower()))
|
|
77
|
+
return bool(words & _NEGATION_WORDS)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _sentiment_signals(text: str) -> tuple[set[str], set[str]]:
|
|
81
|
+
"""Extract positive and negative sentiment signal words."""
|
|
82
|
+
words = set(re.findall(r"[a-z]+", text.lower()))
|
|
83
|
+
return words & _POSITIVE_SIGNALS, words & _NEGATIVE_SIGNALS
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _extract_numbers(text: str) -> list[str]:
|
|
87
|
+
"""Extract numeric values."""
|
|
88
|
+
return [m.group(1).replace(",", "") for m in _NUM_RE.finditer(text)]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Public API
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def detect_contradictions(
|
|
97
|
+
attributions: list[ClaimAttribution],
|
|
98
|
+
*,
|
|
99
|
+
prior_claims: list[str] | None = None,
|
|
100
|
+
content_overlap_threshold: float = 0.30,
|
|
101
|
+
) -> list[ContradictionResult]:
|
|
102
|
+
"""Detect contradictions between claims.
|
|
103
|
+
|
|
104
|
+
Checks all factual/hedge claim pairs within the current window for
|
|
105
|
+
three types of contradiction:
|
|
106
|
+
1. **NEGATION** — Same content + negation flip
|
|
107
|
+
2. **NUMBER_CONFLICT** — Same topic + different numbers
|
|
108
|
+
3. **SEMANTIC** — High word overlap + opposing sentiment
|
|
109
|
+
|
|
110
|
+
If ``prior_claims`` are provided, also checks current claims against
|
|
111
|
+
them for cross-window contradictions.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
attributions: Scored claim attributions for the current window.
|
|
115
|
+
prior_claims: Optional list of claim texts from prior windows.
|
|
116
|
+
content_overlap_threshold: Minimum content word overlap ratio
|
|
117
|
+
(Jaccard) to consider two claims as
|
|
118
|
+
discussing the same topic.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List of ContradictionResult — one per detected contradiction.
|
|
122
|
+
"""
|
|
123
|
+
results: list[ContradictionResult] = []
|
|
124
|
+
|
|
125
|
+
# Collect factual claims
|
|
126
|
+
factual_attrs = [
|
|
127
|
+
a for a in attributions
|
|
128
|
+
if a.claim_type in (ClaimType.FACTUAL_CLAIM, ClaimType.HEDGE)
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
# --- Intra-window contradictions ---
|
|
132
|
+
for i in range(len(factual_attrs)):
|
|
133
|
+
for j in range(i + 1, len(factual_attrs)):
|
|
134
|
+
a = factual_attrs[i]
|
|
135
|
+
b = factual_attrs[j]
|
|
136
|
+
|
|
137
|
+
result = _check_pair(
|
|
138
|
+
a.claim_index, a.claim_text,
|
|
139
|
+
b.claim_index, b.claim_text,
|
|
140
|
+
content_overlap_threshold,
|
|
141
|
+
)
|
|
142
|
+
if result:
|
|
143
|
+
results.append(result)
|
|
144
|
+
|
|
145
|
+
# --- Cross-window contradictions ---
|
|
146
|
+
if prior_claims:
|
|
147
|
+
for attr in factual_attrs:
|
|
148
|
+
for pi, prior in enumerate(prior_claims):
|
|
149
|
+
result = _check_pair(
|
|
150
|
+
attr.claim_index, attr.claim_text,
|
|
151
|
+
-(pi + 1), prior, # negative index = prior window
|
|
152
|
+
content_overlap_threshold,
|
|
153
|
+
)
|
|
154
|
+
if result:
|
|
155
|
+
results.append(result)
|
|
156
|
+
|
|
157
|
+
return results
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _check_pair(
|
|
161
|
+
idx_a: int, text_a: str,
|
|
162
|
+
idx_b: int, text_b: str,
|
|
163
|
+
overlap_threshold: float,
|
|
164
|
+
) -> ContradictionResult | None:
|
|
165
|
+
"""Check a pair of claims for contradiction."""
|
|
166
|
+
words_a = _content_words(text_a)
|
|
167
|
+
words_b = _content_words(text_b)
|
|
168
|
+
|
|
169
|
+
if not words_a or not words_b:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
# Content overlap (Jaccard)
|
|
173
|
+
intersection = words_a & words_b
|
|
174
|
+
union = words_a | words_b
|
|
175
|
+
overlap = len(intersection) / len(union) if union else 0.0
|
|
176
|
+
|
|
177
|
+
if overlap < overlap_threshold:
|
|
178
|
+
return None # Different topics — can't contradict
|
|
179
|
+
|
|
180
|
+
# --- Check 1: Negation flip ---
|
|
181
|
+
neg_a = _has_negation(text_a)
|
|
182
|
+
neg_b = _has_negation(text_b)
|
|
183
|
+
|
|
184
|
+
if neg_a != neg_b:
|
|
185
|
+
return ContradictionResult(
|
|
186
|
+
claim_a_index=idx_a,
|
|
187
|
+
claim_a_text=text_a[:200],
|
|
188
|
+
claim_b_index=idx_b,
|
|
189
|
+
claim_b_text=text_b[:200],
|
|
190
|
+
contradiction_type="NEGATION",
|
|
191
|
+
severity=0.85,
|
|
192
|
+
detail=(
|
|
193
|
+
f"Claims share {len(intersection)} content words but "
|
|
194
|
+
f"one contains negation and the other does not"
|
|
195
|
+
),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# --- Check 2: Number conflict ---
|
|
199
|
+
nums_a = _extract_numbers(text_a)
|
|
200
|
+
nums_b = _extract_numbers(text_b)
|
|
201
|
+
|
|
202
|
+
if nums_a and nums_b:
|
|
203
|
+
# If same topic but different numbers
|
|
204
|
+
shared_nums = set(nums_a) & set(nums_b)
|
|
205
|
+
diff_nums_a = set(nums_a) - set(nums_b)
|
|
206
|
+
diff_nums_b = set(nums_b) - set(nums_a)
|
|
207
|
+
|
|
208
|
+
if diff_nums_a and diff_nums_b and not shared_nums:
|
|
209
|
+
return ContradictionResult(
|
|
210
|
+
claim_a_index=idx_a,
|
|
211
|
+
claim_a_text=text_a[:200],
|
|
212
|
+
claim_b_index=idx_b,
|
|
213
|
+
claim_b_text=text_b[:200],
|
|
214
|
+
contradiction_type="NUMBER_CONFLICT",
|
|
215
|
+
severity=0.75,
|
|
216
|
+
detail=(
|
|
217
|
+
f"Claims about same topic use different numbers: "
|
|
218
|
+
f"{', '.join(sorted(diff_nums_a)[:3])} vs "
|
|
219
|
+
f"{', '.join(sorted(diff_nums_b)[:3])}"
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# --- Check 3: Semantic opposition ---
|
|
224
|
+
pos_a, neg_sig_a = _sentiment_signals(text_a)
|
|
225
|
+
pos_b, neg_sig_b = _sentiment_signals(text_b)
|
|
226
|
+
|
|
227
|
+
# One positive + other negative on same topic
|
|
228
|
+
if (pos_a and neg_sig_b and not neg_sig_a) or (pos_b and neg_sig_a and not neg_sig_b):
|
|
229
|
+
return ContradictionResult(
|
|
230
|
+
claim_a_index=idx_a,
|
|
231
|
+
claim_a_text=text_a[:200],
|
|
232
|
+
claim_b_index=idx_b,
|
|
233
|
+
claim_b_text=text_b[:200],
|
|
234
|
+
contradiction_type="SEMANTIC",
|
|
235
|
+
severity=0.70,
|
|
236
|
+
detail=(
|
|
237
|
+
f"Claims about same topic have opposing sentiment: "
|
|
238
|
+
f"positive signals {sorted(pos_a | pos_b)[:3]}, "
|
|
239
|
+
f"negative signals {sorted(neg_sig_a | neg_sig_b)[:3]}"
|
|
240
|
+
),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return None
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
# Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
|
|
2
|
+
# Licensed under Elastic License 2.0 — see LICENSE.md for details.
|
|
3
|
+
"""Distortion Detector — catch when grounded claims misrepresent source facts.
|
|
4
|
+
|
|
5
|
+
The most dangerous failure in AI attribution: a claim is scored as
|
|
6
|
+
CONTEXT_GROUNDED (high similarity to a source fact) but the model has
|
|
7
|
+
subtly CHANGED a key detail — a number, a negation, a qualifier.
|
|
8
|
+
The auditor sees "grounded, confidence 0.89" and trusts it. But the
|
|
9
|
+
claim is wrong.
|
|
10
|
+
|
|
11
|
+
This module catches six distortion types:
|
|
12
|
+
- NUMBER_CHANGED: "10%" → "15%"
|
|
13
|
+
- NEGATION_FLIP: "is safe" → "is not safe"
|
|
14
|
+
- QUALIFIER_DROPPED: "approximately 10" → "10" (false precision)
|
|
15
|
+
- QUALIFIER_ADDED: "10" → "always 10" (over-generalisation)
|
|
16
|
+
- SCOPE_CHANGED: "in Q3" → "annually"
|
|
17
|
+
- ENTITY_SUBSTITUTED: "Company A" → "Company B"
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from collections.abc import Sequence
|
|
24
|
+
|
|
25
|
+
from crp.envelope.packer import PackedFact
|
|
26
|
+
|
|
27
|
+
from ._embeddings import cosine_similarity as _emb_cosine
|
|
28
|
+
from ._embeddings import encode_texts as _encode_texts
|
|
29
|
+
from ._types import (
|
|
30
|
+
AttributionType,
|
|
31
|
+
ClaimAttribution,
|
|
32
|
+
DistortionResult,
|
|
33
|
+
DistortionType,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Internal: entity extractors
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
# Numbers: integers, decimals, with optional leading $ or trailing %
|
|
42
|
+
_NUM_RE = re.compile(
|
|
43
|
+
r"(?<![a-zA-Z])" # not preceded by letter
|
|
44
|
+
r"\$?\s*" # optional $
|
|
45
|
+
r"(\d[\d,]*(?:\.\d+)?)" # the number itself
|
|
46
|
+
r"\s*%?" # optional %
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Negation words
|
|
50
|
+
_NEGATION_WORDS = frozenset({
|
|
51
|
+
"not", "no", "never", "neither", "nor", "none", "nobody",
|
|
52
|
+
"nothing", "nowhere", "hardly", "scarcely", "barely",
|
|
53
|
+
"doesn't", "don't", "didn't", "isn't", "aren't", "wasn't",
|
|
54
|
+
"weren't", "won't", "wouldn't", "shouldn't", "couldn't",
|
|
55
|
+
"can't", "cannot", "hasn't", "haven't", "hadn't",
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
# Qualifier words that add hedging / precision / universality
|
|
59
|
+
_HEDGE_QUALIFIERS = frozenset({
|
|
60
|
+
"approximately", "roughly", "about", "around", "nearly",
|
|
61
|
+
"possibly", "possibly", "perhaps", "maybe", "likely",
|
|
62
|
+
"probably", "potentially", "estimated", "up to",
|
|
63
|
+
"might", "could", "may", "suggest", "suggests",
|
|
64
|
+
})
|
|
65
|
+
_CERTAINTY_QUALIFIERS = frozenset({
|
|
66
|
+
"exactly", "precisely", "always", "never", "definitely",
|
|
67
|
+
"certainly", "absolutely", "guaranteed", "invariably",
|
|
68
|
+
"exclusively", "solely", "only",
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
# Scope modifiers
|
|
72
|
+
_SCOPE_PATTERNS = re.compile(
|
|
73
|
+
r"\b("
|
|
74
|
+
r"in\s+Q[1-4]|per\s+quarter|quarterly"
|
|
75
|
+
r"|annually|per\s+year|yearly|year-over-year|yoy"
|
|
76
|
+
r"|monthly|per\s+month|week(?:ly)?|daily"
|
|
77
|
+
r"|globally|worldwide|nationally|regionally|locally"
|
|
78
|
+
r"|all\s+(?:users?|customers?|clients?|regions?)"
|
|
79
|
+
r"|some\s+(?:users?|customers?|clients?|regions?)"
|
|
80
|
+
r")\b",
|
|
81
|
+
re.IGNORECASE,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Proper nouns: capitalized words (excluding common sentence starters)
|
|
85
|
+
_PROPER_NOUN_RE = re.compile(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _extract_numbers(text: str) -> list[str]:
|
|
89
|
+
"""Extract all numeric values from text as normalised strings."""
|
|
90
|
+
raw = _NUM_RE.findall(text)
|
|
91
|
+
return [n.replace(",", "") for n in raw]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _extract_negations(text: str) -> set[str]:
|
|
95
|
+
"""Extract negation words present in text."""
|
|
96
|
+
words = set(re.findall(r"[a-z']+", text.lower()))
|
|
97
|
+
return words & _NEGATION_WORDS
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _extract_qualifiers(text: str) -> tuple[set[str], set[str]]:
|
|
101
|
+
"""Return (hedge_qualifiers_found, certainty_qualifiers_found)."""
|
|
102
|
+
words_lower = text.lower()
|
|
103
|
+
hedges = {q for q in _HEDGE_QUALIFIERS if q in words_lower}
|
|
104
|
+
certs = {q for q in _CERTAINTY_QUALIFIERS if q in words_lower}
|
|
105
|
+
return hedges, certs
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _extract_scopes(text: str) -> list[str]:
|
|
109
|
+
"""Extract scope modifiers from text."""
|
|
110
|
+
return [m.group(1).lower() for m in _SCOPE_PATTERNS.finditer(text)]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _extract_proper_nouns(text: str) -> set[str]:
|
|
114
|
+
"""Extract multi-word proper nouns."""
|
|
115
|
+
return {m.group(0) for m in _PROPER_NOUN_RE.finditer(text)}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Public API
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def detect_distortions(
|
|
124
|
+
attributions: list[ClaimAttribution],
|
|
125
|
+
packed_facts: list[PackedFact],
|
|
126
|
+
) -> list[DistortionResult]:
|
|
127
|
+
"""Detect distortions in context-grounded claims.
|
|
128
|
+
|
|
129
|
+
For each CONTEXT_GROUNDED or MIXED attribution, compares the claim
|
|
130
|
+
against its top source fact looking for subtle but critical changes:
|
|
131
|
+
numbers altered, negations flipped, qualifiers dropped, etc.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
attributions: Scored claim attributions from attribution_scorer.
|
|
135
|
+
packed_facts: All envelope facts (for full-text lookup).
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of DistortionResult — one per detected distortion.
|
|
139
|
+
Empty list means no distortions found (perfect fidelity).
|
|
140
|
+
"""
|
|
141
|
+
# Build fact lookup by ID for O(1) access
|
|
142
|
+
fact_lookup: dict[str, str] = {pf.fact_id: pf.text for pf in packed_facts}
|
|
143
|
+
|
|
144
|
+
results: list[DistortionResult] = []
|
|
145
|
+
|
|
146
|
+
for attr in attributions:
|
|
147
|
+
# Only check grounded/mixed claims — these are the "trusted" ones
|
|
148
|
+
if attr.attribution_type not in (
|
|
149
|
+
AttributionType.CONTEXT_GROUNDED,
|
|
150
|
+
AttributionType.MIXED,
|
|
151
|
+
):
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
if not attr.attributed_facts:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
# Compare against top-scoring source fact
|
|
158
|
+
top_fact = attr.attributed_facts[0]
|
|
159
|
+
fact_text = fact_lookup.get(top_fact.fact_id, top_fact.fact_text_preview)
|
|
160
|
+
|
|
161
|
+
# --- Check 1: Number changes ---
|
|
162
|
+
claim_nums = _extract_numbers(attr.claim_text)
|
|
163
|
+
fact_nums = _extract_numbers(fact_text)
|
|
164
|
+
|
|
165
|
+
if claim_nums and fact_nums:
|
|
166
|
+
# Numbers present in claim but NOT in fact → possible distortion
|
|
167
|
+
fact_num_set = set(fact_nums)
|
|
168
|
+
for cn in claim_nums:
|
|
169
|
+
if cn not in fact_num_set and fact_nums:
|
|
170
|
+
# Is there a "close" number in the fact? (same magnitude)
|
|
171
|
+
for fn in fact_nums:
|
|
172
|
+
try:
|
|
173
|
+
cv, fv = float(cn), float(fn)
|
|
174
|
+
# Same order of magnitude but different value
|
|
175
|
+
if fv != 0 and 0.1 < abs(cv / fv) < 10.0 and cv != fv:
|
|
176
|
+
severity = min(abs(cv - fv) / max(abs(fv), 1e-9), 1.0)
|
|
177
|
+
results.append(DistortionResult(
|
|
178
|
+
claim_index=attr.claim_index,
|
|
179
|
+
claim_text=attr.claim_text[:200],
|
|
180
|
+
source_fact_id=top_fact.fact_id,
|
|
181
|
+
source_fact_preview=fact_text[:120],
|
|
182
|
+
distortion_type=DistortionType.NUMBER_CHANGED,
|
|
183
|
+
severity=round(min(severity, 1.0), 2),
|
|
184
|
+
detail=(
|
|
185
|
+
f"Claim uses '{cn}' but source fact "
|
|
186
|
+
f"uses '{fn}'"
|
|
187
|
+
),
|
|
188
|
+
claim_value=cn,
|
|
189
|
+
fact_value=fn,
|
|
190
|
+
))
|
|
191
|
+
break
|
|
192
|
+
except ValueError:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
# --- Check 2: Negation flip ---
|
|
196
|
+
claim_negs = _extract_negations(attr.claim_text)
|
|
197
|
+
fact_negs = _extract_negations(fact_text)
|
|
198
|
+
|
|
199
|
+
# One has negation, the other doesn't → potential flip
|
|
200
|
+
if claim_negs and not fact_negs:
|
|
201
|
+
results.append(DistortionResult(
|
|
202
|
+
claim_index=attr.claim_index,
|
|
203
|
+
claim_text=attr.claim_text[:200],
|
|
204
|
+
source_fact_id=top_fact.fact_id,
|
|
205
|
+
source_fact_preview=fact_text[:120],
|
|
206
|
+
distortion_type=DistortionType.NEGATION_FLIP,
|
|
207
|
+
severity=0.90,
|
|
208
|
+
detail=(
|
|
209
|
+
f"Claim contains negation ({', '.join(sorted(claim_negs)[:3])}) "
|
|
210
|
+
f"but source fact does not"
|
|
211
|
+
),
|
|
212
|
+
claim_value=", ".join(sorted(claim_negs)[:3]),
|
|
213
|
+
fact_value="(no negation)",
|
|
214
|
+
))
|
|
215
|
+
elif fact_negs and not claim_negs:
|
|
216
|
+
results.append(DistortionResult(
|
|
217
|
+
claim_index=attr.claim_index,
|
|
218
|
+
claim_text=attr.claim_text[:200],
|
|
219
|
+
source_fact_id=top_fact.fact_id,
|
|
220
|
+
source_fact_preview=fact_text[:120],
|
|
221
|
+
distortion_type=DistortionType.NEGATION_FLIP,
|
|
222
|
+
severity=0.90,
|
|
223
|
+
detail=(
|
|
224
|
+
f"Source fact contains negation ({', '.join(sorted(fact_negs)[:3])}) "
|
|
225
|
+
f"but claim does not"
|
|
226
|
+
),
|
|
227
|
+
claim_value="(no negation)",
|
|
228
|
+
fact_value=", ".join(sorted(fact_negs)[:3]),
|
|
229
|
+
))
|
|
230
|
+
|
|
231
|
+
# --- Check 3: Qualifier changes ---
|
|
232
|
+
claim_hedges, claim_certs = _extract_qualifiers(attr.claim_text)
|
|
233
|
+
fact_hedges, fact_certs = _extract_qualifiers(fact_text)
|
|
234
|
+
|
|
235
|
+
# Fact has hedge qualifier but claim dropped it → false precision
|
|
236
|
+
dropped = fact_hedges - claim_hedges
|
|
237
|
+
if dropped and not claim_hedges:
|
|
238
|
+
results.append(DistortionResult(
|
|
239
|
+
claim_index=attr.claim_index,
|
|
240
|
+
claim_text=attr.claim_text[:200],
|
|
241
|
+
source_fact_id=top_fact.fact_id,
|
|
242
|
+
source_fact_preview=fact_text[:120],
|
|
243
|
+
distortion_type=DistortionType.QUALIFIER_DROPPED,
|
|
244
|
+
severity=0.60,
|
|
245
|
+
detail=(
|
|
246
|
+
f"Source fact qualifies with '{', '.join(sorted(dropped)[:3])}' "
|
|
247
|
+
f"but claim states without qualification"
|
|
248
|
+
),
|
|
249
|
+
claim_value="(unqualified)",
|
|
250
|
+
fact_value=", ".join(sorted(dropped)[:3]),
|
|
251
|
+
))
|
|
252
|
+
|
|
253
|
+
# Claim adds certainty qualifier not in fact → over-generalisation
|
|
254
|
+
added_certs = claim_certs - fact_certs
|
|
255
|
+
if added_certs:
|
|
256
|
+
results.append(DistortionResult(
|
|
257
|
+
claim_index=attr.claim_index,
|
|
258
|
+
claim_text=attr.claim_text[:200],
|
|
259
|
+
source_fact_id=top_fact.fact_id,
|
|
260
|
+
source_fact_preview=fact_text[:120],
|
|
261
|
+
distortion_type=DistortionType.QUALIFIER_ADDED,
|
|
262
|
+
severity=0.55,
|
|
263
|
+
detail=(
|
|
264
|
+
f"Claim adds certainty qualifier '{', '.join(sorted(added_certs)[:3])}' "
|
|
265
|
+
f"not present in source fact"
|
|
266
|
+
),
|
|
267
|
+
claim_value=", ".join(sorted(added_certs)[:3]),
|
|
268
|
+
fact_value="(no such qualifier)",
|
|
269
|
+
))
|
|
270
|
+
|
|
271
|
+
# --- Check 4: Scope change ---
|
|
272
|
+
claim_scopes = _extract_scopes(attr.claim_text)
|
|
273
|
+
fact_scopes = _extract_scopes(fact_text)
|
|
274
|
+
|
|
275
|
+
if claim_scopes and fact_scopes:
|
|
276
|
+
claim_scope_set = set(claim_scopes)
|
|
277
|
+
fact_scope_set = set(fact_scopes)
|
|
278
|
+
if claim_scope_set != fact_scope_set:
|
|
279
|
+
results.append(DistortionResult(
|
|
280
|
+
claim_index=attr.claim_index,
|
|
281
|
+
claim_text=attr.claim_text[:200],
|
|
282
|
+
source_fact_id=top_fact.fact_id,
|
|
283
|
+
source_fact_preview=fact_text[:120],
|
|
284
|
+
distortion_type=DistortionType.SCOPE_CHANGED,
|
|
285
|
+
severity=0.70,
|
|
286
|
+
detail=(
|
|
287
|
+
f"Claim scope '{', '.join(sorted(claim_scope_set))}' "
|
|
288
|
+
f"differs from fact scope '{', '.join(sorted(fact_scope_set))}'"
|
|
289
|
+
),
|
|
290
|
+
claim_value=", ".join(sorted(claim_scope_set)),
|
|
291
|
+
fact_value=", ".join(sorted(fact_scope_set)),
|
|
292
|
+
))
|
|
293
|
+
|
|
294
|
+
# --- Check 5: Entity substitution ---
|
|
295
|
+
claim_entities = _extract_proper_nouns(attr.claim_text)
|
|
296
|
+
fact_entities = _extract_proper_nouns(fact_text)
|
|
297
|
+
|
|
298
|
+
if claim_entities and fact_entities:
|
|
299
|
+
new_entities = claim_entities - fact_entities
|
|
300
|
+
missing_entities = fact_entities - claim_entities
|
|
301
|
+
# If entities were swapped (some added, some removed)
|
|
302
|
+
if new_entities and missing_entities:
|
|
303
|
+
results.append(DistortionResult(
|
|
304
|
+
claim_index=attr.claim_index,
|
|
305
|
+
claim_text=attr.claim_text[:200],
|
|
306
|
+
source_fact_id=top_fact.fact_id,
|
|
307
|
+
source_fact_preview=fact_text[:120],
|
|
308
|
+
distortion_type=DistortionType.ENTITY_SUBSTITUTED,
|
|
309
|
+
severity=0.80,
|
|
310
|
+
detail=(
|
|
311
|
+
f"Claim introduces entities "
|
|
312
|
+
f"'{', '.join(sorted(new_entities)[:3])}' while fact has "
|
|
313
|
+
f"'{', '.join(sorted(missing_entities)[:3])}'"
|
|
314
|
+
),
|
|
315
|
+
claim_value=", ".join(sorted(new_entities)[:3]),
|
|
316
|
+
fact_value=", ".join(sorted(missing_entities)[:3]),
|
|
317
|
+
))
|
|
318
|
+
|
|
319
|
+
# --- Check 6: Semantic drift (P-3) ---
|
|
320
|
+
# Use sentence-transformer embeddings to detect meaning-level
|
|
321
|
+
# distortions that regex patterns cannot catch (specificity loss,
|
|
322
|
+
# causation inflation, scope generalisation).
|
|
323
|
+
_semantic_drift_check(attributions, fact_lookup, results)
|
|
324
|
+
|
|
325
|
+
return results
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _semantic_drift_check(
|
|
329
|
+
attributions: list[ClaimAttribution],
|
|
330
|
+
fact_lookup: dict[str, str],
|
|
331
|
+
results: list[DistortionResult],
|
|
332
|
+
*,
|
|
333
|
+
_embedder_override: object = None,
|
|
334
|
+
drift_threshold: float = 0.65,
|
|
335
|
+
) -> None:
|
|
336
|
+
"""Detect semantic drift using dense embeddings.
|
|
337
|
+
|
|
338
|
+
Claims that are CONTEXT_GROUNDED but have low semantic similarity
|
|
339
|
+
to their source fact (below drift_threshold) may be paraphrasing
|
|
340
|
+
in a way that changes meaning — something regex can't catch.
|
|
341
|
+
|
|
342
|
+
Only triggers when embeddings are available; degrades silently.
|
|
343
|
+
"""
|
|
344
|
+
# Collect grounded claim-fact pairs
|
|
345
|
+
pairs: list[tuple[ClaimAttribution, str, str]] = []
|
|
346
|
+
for attr in attributions:
|
|
347
|
+
if attr.attribution_type not in (
|
|
348
|
+
AttributionType.CONTEXT_GROUNDED,
|
|
349
|
+
AttributionType.MIXED,
|
|
350
|
+
):
|
|
351
|
+
continue
|
|
352
|
+
if not attr.attributed_facts:
|
|
353
|
+
continue
|
|
354
|
+
top_fact = attr.attributed_facts[0]
|
|
355
|
+
fact_text = fact_lookup.get(top_fact.fact_id, top_fact.fact_text_preview)
|
|
356
|
+
pairs.append((attr, top_fact.fact_id, fact_text))
|
|
357
|
+
|
|
358
|
+
if not pairs:
|
|
359
|
+
return
|
|
360
|
+
|
|
361
|
+
# Batch-encode all claims and facts together
|
|
362
|
+
all_texts = []
|
|
363
|
+
for attr, _, fact_text in pairs:
|
|
364
|
+
all_texts.append(attr.claim_text)
|
|
365
|
+
all_texts.append(fact_text)
|
|
366
|
+
|
|
367
|
+
embs = _encode_texts(all_texts, _model_override=_embedder_override)
|
|
368
|
+
if embs is None:
|
|
369
|
+
return # Embeddings unavailable — silent degradation
|
|
370
|
+
|
|
371
|
+
for i, (attr, fact_id, fact_text) in enumerate(pairs):
|
|
372
|
+
claim_emb = embs[i * 2]
|
|
373
|
+
fact_emb = embs[i * 2 + 1]
|
|
374
|
+
sim = max(0.0, _emb_cosine(claim_emb, fact_emb))
|
|
375
|
+
|
|
376
|
+
if sim < drift_threshold:
|
|
377
|
+
# Already caught by regex checks? Skip if same claim_index
|
|
378
|
+
# already has a distortion result to avoid double-flagging
|
|
379
|
+
already_flagged = any(
|
|
380
|
+
r.claim_index == attr.claim_index for r in results
|
|
381
|
+
)
|
|
382
|
+
if not already_flagged:
|
|
383
|
+
results.append(DistortionResult(
|
|
384
|
+
claim_index=attr.claim_index,
|
|
385
|
+
claim_text=attr.claim_text[:200],
|
|
386
|
+
source_fact_id=fact_id,
|
|
387
|
+
source_fact_preview=fact_text[:120],
|
|
388
|
+
distortion_type=DistortionType.SEMANTIC_DRIFT,
|
|
389
|
+
severity=round(1.0 - sim, 2),
|
|
390
|
+
detail=(
|
|
391
|
+
f"Semantic similarity {sim:.2f} below threshold "
|
|
392
|
+
f"{drift_threshold:.2f} — possible meaning-level "
|
|
393
|
+
f"distortion not caught by lexical checks"
|
|
394
|
+
),
|
|
395
|
+
claim_value=f"similarity={sim:.2f}",
|
|
396
|
+
fact_value=f"threshold={drift_threshold:.2f}",
|
|
397
|
+
))
|