crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,320 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Hallucination Risk Scorer — per-claim composite risk assessment (§7.14.3).
4
+
5
+ **WHY THIS EXISTS**
6
+
7
+ An auditor reviewing AI output asks ONE question:
8
+
9
+ "How likely is it that THIS claim is a hallucination?"
10
+
11
+ Currently they must mentally fuse:
12
+ - Attribution score (was it grounded?)
13
+ - Fidelity score (was the source distorted?)
14
+ - Entailment verdict (does NLI confirm semantic support?)
15
+ - Claim specificity (is this a precise claim that's dangerous if wrong?)
16
+
17
+ This module fuses those four signals into ONE auditable risk score per
18
+ claim, with a clear risk level (LOW / MEDIUM / HIGH / CRITICAL) and a
19
+ list of human-readable risk factors explaining WHY.
20
+
21
+ **RISK FORMULA**
22
+
23
+ risk = 1.0 - (w_a * attribution + w_f * fidelity + w_e * entailment + w_s * (1 - specificity))
24
+
25
+ Where:
26
+ - attribution: top_score from DPE (0-1, higher = better sourced)
27
+ - fidelity: 1.0 if no distortions/fabrications for this claim, else degraded
28
+ - entailment: P(ENTAILED) from NLI (0-1, higher = semantically confirmed)
29
+ - specificity: density of specific entities in the claim (higher = riskier)
30
+ - w_a, w_f, w_e, w_s: configurable weights (default 0.30, 0.25, 0.30, 0.15)
31
+
32
+ Risk levels:
33
+ - risk < 0.25 → LOW
34
+ - risk < 0.50 → MEDIUM
35
+ - risk < 0.75 → HIGH
36
+ - risk ≥ 0.75 → CRITICAL
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import re
42
+ from collections.abc import Sequence
43
+
44
+ from ._types import (
45
+ AttributionType,
46
+ ClaimAttribution,
47
+ ClaimRiskAssessment,
48
+ ClaimType,
49
+ DistortionResult,
50
+ EntailmentLabel,
51
+ EntailmentResult,
52
+ FabricationResult,
53
+ FidelityReport,
54
+ HallucinationRisk,
55
+ HallucinationRiskReport,
56
+ ProvenanceConfig,
57
+ )
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Claim specificity analysis
61
+ # ---------------------------------------------------------------------------
62
+
63
+ # Specific entities that make a claim "risky if wrong"
64
+ _NUMBER_RE = re.compile(r"\b\d[\d,]*(?:\.\d+)?\s*%?\b")
65
+ _DATE_RE = re.compile(
66
+ r"\b(?:(?:19|20)\d{2}|Q[1-4]\s+\d{4}|"
67
+ r"(?:January|February|March|April|May|June|July|August|"
68
+ r"September|October|November|December)\s+\d{4})\b",
69
+ re.IGNORECASE,
70
+ )
71
+ _PROPER_NOUN_RE = re.compile(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b")
72
+ _MEASUREMENT_RE = re.compile(
73
+ r"\b\d+(?:\.\d+)?\s*(?:mg|kg|ml|cm|mm|km|lb|oz|GB|MB|TB|ms|MHz|GHz)\b",
74
+ re.IGNORECASE,
75
+ )
76
+
77
+
78
+ def compute_specificity(claim_text: str) -> float:
79
+ """Compute how specific a claim is (0.0=vague, 1.0=highly specific).
80
+
81
+ More specific claims are riskier if unsupported — "Revenue grew 23.4%
82
+ in Q3 2024 according to Deloitte" is far more dangerous wrong than
83
+ "Performance improved."
84
+
85
+ Specificity = min(1.0, entity_count / 5) — normalised density of
86
+ numbers, dates, proper nouns, and measurements.
87
+ """
88
+ entities = 0
89
+ entities += len(_NUMBER_RE.findall(claim_text))
90
+ entities += len(_DATE_RE.findall(claim_text))
91
+ entities += len(_PROPER_NOUN_RE.findall(claim_text))
92
+ entities += len(_MEASUREMENT_RE.findall(claim_text))
93
+ return min(1.0, entities / 5.0)
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Per-claim fidelity signal
98
+ # ---------------------------------------------------------------------------
99
+
100
+
101
+ def _claim_fidelity_signal(
102
+ claim_index: int,
103
+ fidelity: FidelityReport | None,
104
+ ) -> tuple[float, list[str]]:
105
+ """Compute fidelity signal for a single claim.
106
+
107
+ Returns (fidelity_score, risk_factors) where:
108
+ - 1.0 = no issues found
109
+ - <1.0 = distortions or fabrications detected
110
+ """
111
+ if fidelity is None:
112
+ return 1.0, []
113
+
114
+ score = 1.0
115
+ factors: list[str] = []
116
+
117
+ for d in fidelity.distortions:
118
+ if d.claim_index == claim_index:
119
+ score -= 0.20
120
+ factors.append(f"Distortion: {d.distortion_type.value} (sev={d.severity:.2f})")
121
+
122
+ for f in fidelity.fabrications:
123
+ if f.claim_index == claim_index:
124
+ score -= 0.15
125
+ factors.append(f"Fabrication: {f.entity_type.value} '{f.fabricated_entity}'")
126
+
127
+ return max(0.0, score), factors
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Public API
132
+ # ---------------------------------------------------------------------------
133
+
134
+
135
+ def score_hallucination_risk(
136
+ attributions: list[ClaimAttribution],
137
+ *,
138
+ fidelity: FidelityReport | None = None,
139
+ entailment_results: list[EntailmentResult] | None = None,
140
+ config: ProvenanceConfig | None = None,
141
+ ) -> HallucinationRiskReport:
142
+ """Score hallucination risk for every claim in the output.
143
+
144
+ Combines four independent signals per claim:
145
+ 1. **Attribution** — how well-sourced is the claim?
146
+ 2. **Fidelity** — did lexical checks find distortions?
147
+ 3. **Entailment** — does NLI confirm semantic support?
148
+ 4. **Specificity** — how specific (and thus risky) is the claim?
149
+
150
+ Args:
151
+ attributions: Scored claim attributions from the DPE pipeline.
152
+ fidelity: FidelityReport from the fidelity verification layer.
153
+ entailment_results: EntailmentResults from the entailment verifier.
154
+ config: ProvenanceConfig with risk weight configuration.
155
+
156
+ Returns:
157
+ HallucinationRiskReport with per-claim assessments and aggregates.
158
+ """
159
+ cfg = config or ProvenanceConfig()
160
+ if not cfg.risk_scoring_enabled:
161
+ return HallucinationRiskReport()
162
+
163
+ # Build entailment lookup by claim_index
164
+ ent_lookup: dict[int, EntailmentResult] = {}
165
+ if entailment_results:
166
+ for er in entailment_results:
167
+ ent_lookup[er.claim_index] = er
168
+
169
+ assessments: list[ClaimRiskAssessment] = []
170
+
171
+ for attr in attributions:
172
+ # Only score factual and hedge claims (the risky ones)
173
+ if attr.claim_type not in (ClaimType.FACTUAL_CLAIM, ClaimType.HEDGE):
174
+ assessments.append(ClaimRiskAssessment(
175
+ claim_index=attr.claim_index,
176
+ claim_text=attr.claim_text[:200],
177
+ risk_level=HallucinationRisk.LOW,
178
+ risk_score=0.0,
179
+ risk_factors=["Non-factual claim — low inherent risk"],
180
+ ))
181
+ continue
182
+
183
+ risk_factors: list[str] = []
184
+
185
+ # --- Signal 1: Attribution (higher = safer) ---
186
+ attribution_signal = attr.top_score
187
+ if attr.attribution_type == AttributionType.PARAMETRIC:
188
+ attribution_signal = max(0.0, attribution_signal - 0.30)
189
+ risk_factors.append("Parametric knowledge — not grounded in context")
190
+ elif attr.attribution_type == AttributionType.UNCERTAIN:
191
+ attribution_signal = 0.0
192
+ risk_factors.append("Uncertain attribution — source unknown")
193
+ elif attr.attribution_type == AttributionType.MIXED:
194
+ attribution_signal *= 0.80
195
+ risk_factors.append("Mixed attribution — partially parametric")
196
+
197
+ # --- Signal 2: Fidelity (higher = safer) ---
198
+ fidelity_signal, fidelity_factors = _claim_fidelity_signal(
199
+ attr.claim_index, fidelity,
200
+ )
201
+ risk_factors.extend(fidelity_factors)
202
+
203
+ # --- Signal 3: Entailment (higher = safer) ---
204
+ ent = ent_lookup.get(attr.claim_index)
205
+ if ent is not None:
206
+ entailment_signal = ent.entailment_score
207
+ if ent.label == EntailmentLabel.CONTRADICTION:
208
+ entailment_signal = 0.0
209
+ risk_factors.append(
210
+ f"NLI CONTRADICTION (P={ent.contradiction_score:.2f}) — "
211
+ f"claim semantically conflicts with source fact"
212
+ )
213
+ elif ent.label == EntailmentLabel.NEUTRAL:
214
+ entailment_signal = 0.3 # Partial credit for neutral
215
+ risk_factors.append("NLI neutral — claim not semantically supported")
216
+ else:
217
+ # No entailment data — use attribution as proxy
218
+ entailment_signal = attribution_signal * 0.5
219
+
220
+ # --- Signal 4: Specificity (higher = riskier) ---
221
+ specificity = compute_specificity(attr.claim_text)
222
+ if specificity > 0.6:
223
+ risk_factors.append(f"Highly specific claim (specificity={specificity:.2f})")
224
+
225
+ # --- Composite risk score ---
226
+ # Safety score = weighted combination of clean signals
227
+ #
228
+ # WEIGHT RATIONALE (G-3):
229
+ # - attribution (0.30): Primary grounding signal — whether the claim
230
+ # can be traced to envelope facts. Highest weight because
231
+ # ungrounded claims are the root cause of hallucinations.
232
+ # - entailment (0.30): Equal to attribution because semantic
233
+ # verification catches meaning-level drift that attribution
234
+ # scoring alone cannot (e.g., specificity loss, causation
235
+ # inflation). Provides the ML-powered "second opinion".
236
+ # - fidelity (0.25): Lexical verification layer — catches number
237
+ # changes, negation flips, qualifier drops. Slightly lower
238
+ # weight because it's surface-level and the entailment layer
239
+ # provides deeper semantic coverage.
240
+ # - specificity (0.15): Risk amplifier — highly specific claims
241
+ # (numbers, dates, names) are more dangerous if wrong, but
242
+ # specificity alone doesn't indicate hallucination.
243
+ #
244
+ safety = (
245
+ cfg.risk_weight_attribution * attribution_signal
246
+ + cfg.risk_weight_fidelity * fidelity_signal
247
+ + cfg.risk_weight_entailment * entailment_signal
248
+ + cfg.risk_weight_specificity * (1.0 - specificity)
249
+ )
250
+ risk_score = round(max(0.0, min(1.0, 1.0 - safety)), 4)
251
+
252
+ # CRITICAL SIGNAL OVERRIDE (G-3):
253
+ # If ANY key signal is catastrophically low (< 0.15), override
254
+ # risk to at least HIGH. A single collapsed signal means the
255
+ # claim has a fundamental grounding/fidelity/semantic gap that
256
+ # the weighted average might mask.
257
+ _CRITICAL_FLOOR = 0.15
258
+ critical_signals = [
259
+ ("attribution", attribution_signal),
260
+ ("fidelity", fidelity_signal),
261
+ ("entailment", entailment_signal),
262
+ ]
263
+ for signal_name, signal_val in critical_signals:
264
+ if signal_val < _CRITICAL_FLOOR:
265
+ risk_score = max(risk_score, 0.50) # Floor = HIGH
266
+ risk_factors.append(
267
+ f"Critical signal override: {signal_name}={signal_val:.2f} < {_CRITICAL_FLOOR}"
268
+ )
269
+ break
270
+
271
+ # --- Risk level ---
272
+ if risk_score >= 0.75:
273
+ risk_level = HallucinationRisk.CRITICAL
274
+ elif risk_score >= 0.50:
275
+ risk_level = HallucinationRisk.HIGH
276
+ elif risk_score >= 0.25:
277
+ risk_level = HallucinationRisk.MEDIUM
278
+ else:
279
+ risk_level = HallucinationRisk.LOW
280
+
281
+ assessments.append(ClaimRiskAssessment(
282
+ claim_index=attr.claim_index,
283
+ claim_text=attr.claim_text[:200],
284
+ risk_level=risk_level,
285
+ risk_score=risk_score,
286
+ attribution_signal=round(attribution_signal, 4),
287
+ fidelity_signal=round(fidelity_signal, 4),
288
+ entailment_signal=round(entailment_signal, 4),
289
+ specificity_signal=round(specificity, 4),
290
+ risk_factors=risk_factors if risk_factors else ["No risk factors identified"],
291
+ ))
292
+
293
+ # --- Window-level aggregates ---
294
+ high_count = sum(1 for a in assessments if a.risk_level == HallucinationRisk.HIGH)
295
+ critical_count = sum(1 for a in assessments if a.risk_level == HallucinationRisk.CRITICAL)
296
+
297
+ factual_assessments = [
298
+ a for a in assessments if a.risk_score > 0.0
299
+ ]
300
+ mean_risk = (
301
+ sum(a.risk_score for a in factual_assessments) / len(factual_assessments)
302
+ if factual_assessments else 0.0
303
+ )
304
+
305
+ if critical_count > 0:
306
+ window_level = HallucinationRisk.CRITICAL
307
+ elif high_count > 0:
308
+ window_level = HallucinationRisk.HIGH
309
+ elif mean_risk >= 0.25:
310
+ window_level = HallucinationRisk.MEDIUM
311
+ else:
312
+ window_level = HallucinationRisk.LOW
313
+
314
+ return HallucinationRiskReport(
315
+ assessments=assessments,
316
+ high_risk_count=high_count,
317
+ critical_risk_count=critical_count,
318
+ mean_risk_score=round(mean_risk, 4),
319
+ window_risk_level=window_level,
320
+ )
@@ -0,0 +1,106 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Omission Analyzer — detect when the model silently ignores important facts.
4
+
5
+ 15 high-priority facts went into the envelope. The model used 4 and
6
+ ignored 11. If Fact #3 was "Product has a known safety defect" and the
7
+ model never mentioned it — that is a **material omission**.
8
+
9
+ This module identifies which envelope facts received NO attribution
10
+ from any output claim and ranks them by importance (original packing
11
+ score). High-importance omissions are flagged for manual review.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Sequence
17
+
18
+ from crp.envelope.packer import PackedFact
19
+
20
+ from ._types import (
21
+ ClaimAttribution,
22
+ OmissionResult,
23
+ OmissionSeverity,
24
+ )
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Public API
29
+ # ---------------------------------------------------------------------------
30
+
31
+
32
+ def analyze_omissions(
33
+ attributions: list[ClaimAttribution],
34
+ packed_facts: Sequence[PackedFact],
35
+ *,
36
+ attribution_floor: float = 0.20,
37
+ ) -> list[OmissionResult]:
38
+ """Identify envelope facts that the model ignored.
39
+
40
+ For each packed fact, finds the maximum attribution score any output
41
+ claim gave it. Facts with a maximum score below ``attribution_floor``
42
+ are considered omitted.
43
+
44
+ Results are sorted by fact relevance score descending — the most
45
+ important omissions first.
46
+
47
+ Args:
48
+ attributions: Scored claim attributions from the attribution scorer.
49
+ packed_facts: All facts that were packed into the envelope.
50
+ attribution_floor: Maximum composite score below which a fact
51
+ is considered "not used" (default 0.20).
52
+
53
+ Returns:
54
+ List of OmissionResult sorted by importance (highest first).
55
+ """
56
+ if not packed_facts:
57
+ return []
58
+
59
+ # Build a map: fact_id → max composite score from any claim
60
+ max_scores: dict[str, float] = {pf.fact_id: 0.0 for pf in packed_facts}
61
+
62
+ for attr in attributions:
63
+ for fs in attr.attributed_facts:
64
+ if fs.fact_id in max_scores:
65
+ max_scores[fs.fact_id] = max(
66
+ max_scores[fs.fact_id], fs.composite_score
67
+ )
68
+
69
+ # Determine relevance quartiles for severity classification
70
+ scores_list = sorted(
71
+ (pf.score for pf in packed_facts), reverse=True
72
+ )
73
+ n = len(scores_list)
74
+ q1_threshold = scores_list[n // 4] if n >= 4 else scores_list[0]
75
+ q2_threshold = scores_list[n // 2] if n >= 2 else scores_list[0]
76
+
77
+ results: list[OmissionResult] = []
78
+
79
+ for pf in packed_facts:
80
+ max_attr = max_scores.get(pf.fact_id, 0.0)
81
+
82
+ if max_attr >= attribution_floor:
83
+ continue # Fact was adequately used
84
+
85
+ # Classify severity based on packing relevance score
86
+ if pf.score >= q1_threshold:
87
+ severity = OmissionSeverity.CRITICAL
88
+ elif pf.score >= q2_threshold:
89
+ severity = OmissionSeverity.HIGH
90
+ elif pf.score > 0.0:
91
+ severity = OmissionSeverity.MEDIUM
92
+ else:
93
+ severity = OmissionSeverity.LOW
94
+
95
+ results.append(OmissionResult(
96
+ fact_id=pf.fact_id,
97
+ fact_text_preview=pf.text[:120],
98
+ fact_relevance_score=round(pf.score, 4),
99
+ max_attribution_score=round(max_attr, 4),
100
+ severity=severity,
101
+ ))
102
+
103
+ # Sort by relevance (most important omissions first)
104
+ results.sort(key=lambda r: r.fact_relevance_score, reverse=True)
105
+
106
+ return results
@@ -0,0 +1,205 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Provenance Chain Builder — link claims → facts → windows → tasks (§7.14.3).
4
+
5
+ Constructs full provenance chains from attribution results, tracing each claim
6
+ back through the CRP pipeline:
7
+
8
+ Claim → attributed Fact → source Window → Envelope → original Task
9
+
10
+ Also enriches FactScore objects with fact metadata (source_window_id,
11
+ extraction_stage) when a WarmStateStore or fact lookup is available.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+ from crp.envelope.packer import PackedFact
19
+
20
+ from ._types import (
21
+ AttributionType,
22
+ ClaimAttribution,
23
+ ProvenanceChain,
24
+ ProvenanceLink,
25
+ )
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Fact metadata enrichment
30
+ # ---------------------------------------------------------------------------
31
+
32
+
33
+ def enrich_fact_metadata(
34
+ attributions: list[ClaimAttribution],
35
+ fact_metadata: dict[str, dict[str, Any]],
36
+ ) -> None:
37
+ """Enrich FactScore entries with fact provenance metadata (in-place).
38
+
39
+ Args:
40
+ attributions: List of claim attributions to enrich.
41
+ fact_metadata: Dict mapping fact_id → {
42
+ "source_window_id": str,
43
+ "extraction_stage": int,
44
+ "confidence": float,
45
+ ...
46
+ }
47
+ """
48
+ for attr in attributions:
49
+ for fs in attr.attributed_facts:
50
+ meta = fact_metadata.get(fs.fact_id, {})
51
+ fs.fact_source_window = meta.get("source_window_id", "")
52
+ fs.fact_extraction_stage = meta.get("extraction_stage", 0)
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Chain construction
57
+ # ---------------------------------------------------------------------------
58
+
59
+
60
+ def build_provenance_chain(
61
+ attribution: ClaimAttribution,
62
+ *,
63
+ session_id: str = "",
64
+ window_id: str = "",
65
+ envelope_saturation: float = 0.0,
66
+ envelope_facts_included: int = 0,
67
+ task_input_preview: str = "",
68
+ ) -> ProvenanceChain:
69
+ """Build a full provenance chain for a single claim attribution.
70
+
71
+ The chain traces from the claim back to its source:
72
+ Claim → Fact → Window → Envelope → Task
73
+
74
+ For PARAMETRIC claims (no supporting fact), the chain is shorter.
75
+
76
+ Args:
77
+ attribution: Scored claim attribution from attribution_scorer.
78
+ session_id: Current session ID.
79
+ window_id: Current window ID.
80
+ envelope_saturation: Envelope saturation ratio.
81
+ envelope_facts_included: Number of facts in the envelope.
82
+ task_input_preview: First 120 chars of the task input.
83
+
84
+ Returns:
85
+ ProvenanceChain with linked provenance levels.
86
+ """
87
+ links: list[ProvenanceLink] = []
88
+
89
+ # Level 1: The claim itself
90
+ links.append(ProvenanceLink(
91
+ level="claim",
92
+ label=f"Claim #{attribution.claim_index}: {attribution.claim_type.value}",
93
+ detail={
94
+ "claim_text": attribution.claim_text[:200],
95
+ "claim_type": attribution.claim_type.value,
96
+ "attribution_type": attribution.attribution_type.value,
97
+ "confidence": attribution.confidence,
98
+ },
99
+ ))
100
+
101
+ # Level 2: Attributed fact(s)
102
+ if attribution.attribution_type in (
103
+ AttributionType.CONTEXT_GROUNDED,
104
+ AttributionType.MIXED,
105
+ ) and attribution.attributed_facts:
106
+ top_fact = attribution.attributed_facts[0]
107
+ links.append(ProvenanceLink(
108
+ level="fact",
109
+ label=f"Fact {top_fact.fact_id[:8]}... (score: {top_fact.composite_score:.2f})",
110
+ detail={
111
+ "fact_id": top_fact.fact_id,
112
+ "fact_preview": top_fact.fact_text_preview,
113
+ "composite_score": top_fact.composite_score,
114
+ "semantic_similarity": top_fact.semantic_similarity,
115
+ "lexical_overlap": top_fact.lexical_overlap,
116
+ "source_window": top_fact.fact_source_window,
117
+ "extraction_stage": top_fact.fact_extraction_stage,
118
+ },
119
+ ))
120
+
121
+ # Level 3: Source window (if known)
122
+ if top_fact.fact_source_window:
123
+ links.append(ProvenanceLink(
124
+ level="window",
125
+ label=f"Window {top_fact.fact_source_window[:8]}... (stage {top_fact.fact_extraction_stage})",
126
+ detail={
127
+ "window_id": top_fact.fact_source_window,
128
+ "extraction_stage": top_fact.fact_extraction_stage,
129
+ },
130
+ ))
131
+ elif attribution.attribution_type == AttributionType.PARAMETRIC:
132
+ links.append(ProvenanceLink(
133
+ level="fact",
134
+ label="No supporting context fact (likely parametric knowledge)",
135
+ detail={
136
+ "attribution_type": "PARAMETRIC",
137
+ "top_score": attribution.top_score,
138
+ "note": "Claim appears to originate from model training data, "
139
+ "not from provided context.",
140
+ },
141
+ ))
142
+
143
+ # Level 4: Envelope context
144
+ links.append(ProvenanceLink(
145
+ level="envelope",
146
+ label=f"Envelope ({envelope_facts_included} facts, "
147
+ f"saturation: {envelope_saturation:.0%})",
148
+ detail={
149
+ "window_id": window_id,
150
+ "facts_included": envelope_facts_included,
151
+ "saturation": round(envelope_saturation, 4),
152
+ },
153
+ ))
154
+
155
+ # Level 5: Task input
156
+ links.append(ProvenanceLink(
157
+ level="task",
158
+ label=f"Session {session_id[:8]}..." if session_id else "Session",
159
+ detail={
160
+ "session_id": session_id,
161
+ "task_preview": task_input_preview[:120],
162
+ },
163
+ ))
164
+
165
+ return ProvenanceChain(
166
+ claim_text=attribution.claim_text[:200],
167
+ claim_index=attribution.claim_index,
168
+ attribution_type=attribution.attribution_type,
169
+ links=links,
170
+ )
171
+
172
+
173
+ def build_all_chains(
174
+ attributions: list[ClaimAttribution],
175
+ *,
176
+ session_id: str = "",
177
+ window_id: str = "",
178
+ envelope_saturation: float = 0.0,
179
+ envelope_facts_included: int = 0,
180
+ task_input_preview: str = "",
181
+ ) -> list[ProvenanceChain]:
182
+ """Build provenance chains for all attributed claims.
183
+
184
+ Args:
185
+ attributions: All claim attributions from scorer.
186
+ session_id: Current session ID.
187
+ window_id: Current window ID.
188
+ envelope_saturation: Envelope saturation ratio.
189
+ envelope_facts_included: Facts in envelope.
190
+ task_input_preview: First 120 chars of task input.
191
+
192
+ Returns:
193
+ List of ProvenanceChain objects (one per attribution).
194
+ """
195
+ return [
196
+ build_provenance_chain(
197
+ attr,
198
+ session_id=session_id,
199
+ window_id=window_id,
200
+ envelope_saturation=envelope_saturation,
201
+ envelope_facts_included=envelope_facts_included,
202
+ task_input_preview=task_input_preview,
203
+ )
204
+ for attr in attributions
205
+ ]