crprotocol 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crp/__init__.py +126 -0
- crp/__main__.py +8 -0
- crp/_typing.py +27 -0
- crp/_version.py +5 -0
- crp/adapters.py +31 -0
- crp/advanced/__init__.py +40 -0
- crp/advanced/auto_ingest.py +400 -0
- crp/advanced/cqs.py +235 -0
- crp/advanced/cross_window.py +477 -0
- crp/advanced/curator.py +265 -0
- crp/advanced/feedback.py +146 -0
- crp/advanced/hierarchical.py +211 -0
- crp/advanced/meta_learning.py +401 -0
- crp/advanced/parallel.py +98 -0
- crp/advanced/review_cycle.py +329 -0
- crp/advanced/scale_mode.py +129 -0
- crp/advanced/source_grounding.py +207 -0
- crp/ckf/__init__.py +35 -0
- crp/ckf/community.py +377 -0
- crp/ckf/fabric.py +445 -0
- crp/ckf/gc.py +175 -0
- crp/ckf/graph_walk.py +87 -0
- crp/ckf/merge.py +133 -0
- crp/ckf/pattern_query.py +122 -0
- crp/ckf/pubsub.py +128 -0
- crp/ckf/semantic.py +207 -0
- crp/cli/__init__.py +7 -0
- crp/cli/main.py +329 -0
- crp/cli/sidecar.py +929 -0
- crp/cli/startup.py +272 -0
- crp/continuation/__init__.py +103 -0
- crp/continuation/completion.py +348 -0
- crp/continuation/degradation.py +157 -0
- crp/continuation/document_map.py +160 -0
- crp/continuation/flow.py +109 -0
- crp/continuation/gap.py +419 -0
- crp/continuation/manager.py +484 -0
- crp/continuation/quality_monitor.py +179 -0
- crp/continuation/stitch.py +419 -0
- crp/continuation/trigger.py +142 -0
- crp/continuation/voice.py +157 -0
- crp/core/__init__.py +69 -0
- crp/core/batch.py +77 -0
- crp/core/circuit_breaker.py +116 -0
- crp/core/config.py +377 -0
- crp/core/context_tools.py +540 -0
- crp/core/dispatch_router.py +3977 -0
- crp/core/errors.py +128 -0
- crp/core/extraction_facade.py +384 -0
- crp/core/facilitator.py +713 -0
- crp/core/idempotency.py +215 -0
- crp/core/orchestrator.py +1435 -0
- crp/core/relay_strategies.py +613 -0
- crp/core/security_manager.py +140 -0
- crp/core/session.py +134 -0
- crp/core/task_intent.py +36 -0
- crp/core/window.py +363 -0
- crp/envelope/__init__.py +30 -0
- crp/envelope/builder.py +288 -0
- crp/envelope/decomposer.py +236 -0
- crp/envelope/formatter.py +168 -0
- crp/envelope/packer.py +211 -0
- crp/envelope/reranker.py +209 -0
- crp/envelope/scoring.py +310 -0
- crp/extraction/__init__.py +45 -0
- crp/extraction/complexity.py +96 -0
- crp/extraction/contradiction.py +132 -0
- crp/extraction/pipeline.py +360 -0
- crp/extraction/quality_gate.py +237 -0
- crp/extraction/stage1_regex.py +173 -0
- crp/extraction/stage2_statistical.py +244 -0
- crp/extraction/stage3_gliner.py +210 -0
- crp/extraction/stage4_uie.py +183 -0
- crp/extraction/stage5_discourse.py +175 -0
- crp/extraction/stage6_llm.py +178 -0
- crp/extraction/structured_output.py +219 -0
- crp/extraction/types.py +299 -0
- crp/license_guard.py +722 -0
- crp/observability/__init__.py +30 -0
- crp/observability/audit.py +118 -0
- crp/observability/events.py +233 -0
- crp/observability/metrics.py +264 -0
- crp/observability/quality.py +135 -0
- crp/observability/structured_logging.py +81 -0
- crp/observability/telemetry.py +117 -0
- crp/provenance/__init__.py +314 -0
- crp/provenance/_embeddings.py +97 -0
- crp/provenance/_types.py +378 -0
- crp/provenance/attribution_scorer.py +252 -0
- crp/provenance/claim_detector.py +229 -0
- crp/provenance/contradiction_detector.py +243 -0
- crp/provenance/distortion_detector.py +397 -0
- crp/provenance/entailment_verifier.py +358 -0
- crp/provenance/fabrication_detector.py +203 -0
- crp/provenance/hallucination_scorer.py +320 -0
- crp/provenance/omission_analyzer.py +106 -0
- crp/provenance/provenance_chain.py +205 -0
- crp/provenance/report_generator.py +440 -0
- crp/providers/__init__.py +43 -0
- crp/providers/anthropic.py +270 -0
- crp/providers/base.py +135 -0
- crp/providers/custom.py +63 -0
- crp/providers/diagnostic.py +251 -0
- crp/providers/llamacpp.py +224 -0
- crp/providers/manager.py +139 -0
- crp/providers/ollama.py +243 -0
- crp/providers/openai.py +628 -0
- crp/providers/tokenizers.py +48 -0
- crp/py.typed +0 -0
- crp/resources/__init__.py +53 -0
- crp/resources/adaptive_allocator.py +525 -0
- crp/resources/cost_model.py +388 -0
- crp/resources/overhead_manager.py +217 -0
- crp/resources/resource_manager.py +262 -0
- crp/schemas/__init__.py +20 -0
- crp/schemas/cost-estimate.json +33 -0
- crp/schemas/crp-error.json +43 -0
- crp/schemas/envelope-preview.json +40 -0
- crp/schemas/persisted-state-header.json +27 -0
- crp/schemas/quality-report.json +94 -0
- crp/schemas/session-handle.json +33 -0
- crp/schemas/session-status.json +57 -0
- crp/schemas/stream-event.json +18 -0
- crp/schemas/task-intent.json +42 -0
- crp/security/__init__.py +93 -0
- crp/security/audit_trail.py +392 -0
- crp/security/binding.py +192 -0
- crp/security/compliance.py +813 -0
- crp/security/consent.py +593 -0
- crp/security/embedding_defense.py +161 -0
- crp/security/encryption.py +202 -0
- crp/security/injection.py +335 -0
- crp/security/integrity.py +267 -0
- crp/security/privacy.py +662 -0
- crp/security/quarantine.py +249 -0
- crp/security/rbac.py +221 -0
- crp/security/validation.py +164 -0
- crp/state/__init__.py +31 -0
- crp/state/cold_storage.py +258 -0
- crp/state/compaction.py +263 -0
- crp/state/critical_state.py +104 -0
- crp/state/event_log.py +313 -0
- crp/state/fact.py +189 -0
- crp/state/serialization.py +189 -0
- crp/state/session_cleanup.py +77 -0
- crp/state/snapshot.py +290 -0
- crp/state/warm_store.py +346 -0
- crprotocol-2.0.0.dist-info/METADATA +1295 -0
- crprotocol-2.0.0.dist-info/RECORD +153 -0
- crprotocol-2.0.0.dist-info/WHEEL +4 -0
- crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
- crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
- crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
# Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
|
|
2
|
+
# Licensed under Elastic License 2.0 — see LICENSE.md for details.
|
|
3
|
+
"""Cross-window validation — 3-tier consistency checks (§13).
|
|
4
|
+
|
|
5
|
+
Tier 1: Extraction-based (always, zero LLM cost)
|
|
6
|
+
Tier 2: LLM-targeted (2B+ models)
|
|
7
|
+
Tier 3: Full LLM review (7B+ models)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
import re
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Constants
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
EMBEDDING_CONTRADICTION_SIM = 0.85
|
|
23
|
+
EMBEDDING_CONTRADICTION_EDIT = 0.3
|
|
24
|
+
TIER_1_INTERVAL = 5
|
|
25
|
+
TIER_2_INTERVAL = 10
|
|
26
|
+
TIER_3_INTERVAL = 20
|
|
27
|
+
MAX_CORRECTION_WINDOWS = 3
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Data types
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ConsistencyIssue:
|
|
37
|
+
"""Single consistency issue found during validation."""
|
|
38
|
+
|
|
39
|
+
issue_type: str # numerical_contradiction, semantic_contradiction,
|
|
40
|
+
# undefined_reference, structural_gap
|
|
41
|
+
description: str = ""
|
|
42
|
+
severity: str = "medium" # "low" | "medium" | "high"
|
|
43
|
+
windows: list[int] | None = None
|
|
44
|
+
confirmed: bool = False
|
|
45
|
+
facts: list[Any] | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class ValidationResult:
|
|
50
|
+
"""Output of a validation tier."""
|
|
51
|
+
|
|
52
|
+
tier: int = 1
|
|
53
|
+
issues: list[ConsistencyIssue] = field(default_factory=list)
|
|
54
|
+
timestamp: float = 0.0
|
|
55
|
+
window_range: tuple[int, int] = (0, 0)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def has_issues(self) -> bool:
|
|
59
|
+
return len(self.issues) > 0
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def high_severity_count(self) -> int:
|
|
63
|
+
return sum(1 for i in self.issues if i.severity == "high")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class ReviewCycleConfig:
|
|
68
|
+
"""Configuration for review cycles."""
|
|
69
|
+
|
|
70
|
+
enabled: bool = True
|
|
71
|
+
tier_1_interval: int = TIER_1_INTERVAL
|
|
72
|
+
tier_2_enabled: bool = True
|
|
73
|
+
tier_2_interval: int = TIER_2_INTERVAL
|
|
74
|
+
tier_3_enabled: bool = True
|
|
75
|
+
tier_3_interval: int = TIER_3_INTERVAL
|
|
76
|
+
tier_3_min_model_capability: int = 3
|
|
77
|
+
correction_mode: str = "flag" # "flag" | "correct"
|
|
78
|
+
max_correction_windows: int = MAX_CORRECTION_WINDOWS
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
# Tier 1 — Extraction-based (ALWAYS, zero LLM cost)
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
# Regex for extracting numbers with context
|
|
86
|
+
_NUMBER_PATTERN = re.compile(r"(\b\d+(?:\.\d+)?(?:\s*(?:%|percent|GB|MB|KB|ms|seconds?|minutes?|hours?))?)\b")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _extract_numbers(text: str) -> list[tuple[str, str]]:
|
|
90
|
+
"""Extract numbers + surrounding context from text."""
|
|
91
|
+
results: list[tuple[str, str]] = []
|
|
92
|
+
for m in _NUMBER_PATTERN.finditer(text):
|
|
93
|
+
start = max(0, m.start() - 30)
|
|
94
|
+
end = min(len(text), m.end() + 30)
|
|
95
|
+
context = text[start:end].strip()
|
|
96
|
+
results.append((m.group(1), context))
|
|
97
|
+
return results
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _normalized_edit_distance(a: str, b: str) -> float:
|
|
101
|
+
"""Levenshtein edit distance normalized to [0, 1]."""
|
|
102
|
+
if not a and not b:
|
|
103
|
+
return 0.0
|
|
104
|
+
if not a or not b:
|
|
105
|
+
return 1.0
|
|
106
|
+
n, m = len(a), len(b)
|
|
107
|
+
dp = list(range(m + 1))
|
|
108
|
+
for i in range(1, n + 1):
|
|
109
|
+
prev = dp[0]
|
|
110
|
+
dp[0] = i
|
|
111
|
+
for j in range(1, m + 1):
|
|
112
|
+
tmp = dp[j]
|
|
113
|
+
if a[i - 1] == b[j - 1]:
|
|
114
|
+
dp[j] = prev
|
|
115
|
+
else:
|
|
116
|
+
dp[j] = 1 + min(prev, dp[j], dp[j - 1])
|
|
117
|
+
prev = tmp
|
|
118
|
+
return dp[m] / max(n, m)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class CrossWindowValidator:
|
|
122
|
+
"""Three-tier cross-window consistency validation."""
|
|
123
|
+
|
|
124
|
+
def __init__(
|
|
125
|
+
self,
|
|
126
|
+
dispatch_fn: Callable[[str, str], tuple[str, Any]] | None = None,
|
|
127
|
+
embedding_fn: Callable[[str], list[float]] | None = None,
|
|
128
|
+
config: ReviewCycleConfig | None = None,
|
|
129
|
+
) -> None:
|
|
130
|
+
self._dispatch_fn = dispatch_fn
|
|
131
|
+
self._embedding_fn = embedding_fn
|
|
132
|
+
self.config = config or ReviewCycleConfig()
|
|
133
|
+
self._model_capability: int | None = None
|
|
134
|
+
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
# Tier 1: Extraction-based
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
def extraction_based_validation(
|
|
140
|
+
self,
|
|
141
|
+
facts: list[dict[str, Any]],
|
|
142
|
+
window_outputs: list[str] | None = None,
|
|
143
|
+
planned_sections: list[str] | None = None,
|
|
144
|
+
) -> ValidationResult:
|
|
145
|
+
"""Zero-cost structural validation. Always runs."""
|
|
146
|
+
issues: list[ConsistencyIssue] = []
|
|
147
|
+
|
|
148
|
+
# Check 1: Numerical consistency
|
|
149
|
+
issues.extend(self._check_numerical_consistency(facts))
|
|
150
|
+
|
|
151
|
+
# Check 2: Entity reference integrity
|
|
152
|
+
issues.extend(self._check_entity_references(facts))
|
|
153
|
+
|
|
154
|
+
# Check 3: Embedding-based contradiction
|
|
155
|
+
if self._embedding_fn:
|
|
156
|
+
issues.extend(self._check_embedding_contradictions(facts))
|
|
157
|
+
|
|
158
|
+
# Check 4: Structural completeness
|
|
159
|
+
if planned_sections and window_outputs:
|
|
160
|
+
issues.extend(self._check_structural_completeness(
|
|
161
|
+
window_outputs, planned_sections,
|
|
162
|
+
))
|
|
163
|
+
|
|
164
|
+
return ValidationResult(tier=1, issues=issues)
|
|
165
|
+
|
|
166
|
+
def _check_numerical_consistency(
|
|
167
|
+
self, facts: list[dict[str, Any]],
|
|
168
|
+
) -> list[ConsistencyIssue]:
|
|
169
|
+
"""Find contradicting numbers for the same metric across facts."""
|
|
170
|
+
issues: list[ConsistencyIssue] = []
|
|
171
|
+
# Group numbers by broad context (simplified)
|
|
172
|
+
number_groups: dict[str, list[tuple[str, int]]] = {}
|
|
173
|
+
for i, fact in enumerate(facts):
|
|
174
|
+
text = fact.get("text", "")
|
|
175
|
+
for num, ctx in _extract_numbers(text):
|
|
176
|
+
# Use context words as group key
|
|
177
|
+
words = re.findall(r"[a-zA-Z]+", ctx.lower())
|
|
178
|
+
key = " ".join(sorted(set(words)))
|
|
179
|
+
if key and len(key) > 5:
|
|
180
|
+
number_groups.setdefault(key, []).append((num, i))
|
|
181
|
+
|
|
182
|
+
for key, entries in number_groups.items():
|
|
183
|
+
values = set(e[0] for e in entries)
|
|
184
|
+
if len(values) > 1 and len(entries) >= 2:
|
|
185
|
+
issues.append(ConsistencyIssue(
|
|
186
|
+
issue_type="numerical_contradiction",
|
|
187
|
+
description=f"Conflicting values {values} for context '{key[:50]}'",
|
|
188
|
+
severity="high",
|
|
189
|
+
facts=[e[1] for e in entries],
|
|
190
|
+
))
|
|
191
|
+
return issues
|
|
192
|
+
|
|
193
|
+
def _check_entity_references(
|
|
194
|
+
self, facts: list[dict[str, Any]],
|
|
195
|
+
) -> list[ConsistencyIssue]:
|
|
196
|
+
"""Check entity first-defined vs first-referenced."""
|
|
197
|
+
issues: list[ConsistencyIssue] = []
|
|
198
|
+
# Track entities: simplified heuristic using capitalized multi-word sequences
|
|
199
|
+
defined: set[str] = set()
|
|
200
|
+
for fact in facts:
|
|
201
|
+
text = fact.get("text", "")
|
|
202
|
+
# Find capitalized terms as entities
|
|
203
|
+
entities = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b", text)
|
|
204
|
+
for entity in entities:
|
|
205
|
+
ent_lower = entity.lower()
|
|
206
|
+
if ent_lower not in defined:
|
|
207
|
+
defined.add(ent_lower)
|
|
208
|
+
return issues
|
|
209
|
+
|
|
210
|
+
def _check_embedding_contradictions(
|
|
211
|
+
self, facts: list[dict[str, Any]],
|
|
212
|
+
) -> list[ConsistencyIssue]:
|
|
213
|
+
"""Find high-similarity facts with high edit distance."""
|
|
214
|
+
issues: list[ConsistencyIssue] = []
|
|
215
|
+
if not self._embedding_fn or len(facts) < 2:
|
|
216
|
+
return issues
|
|
217
|
+
|
|
218
|
+
# Compute embeddings (limited to first 50 for performance)
|
|
219
|
+
sample = facts[:50]
|
|
220
|
+
embeddings: list[list[float]] = []
|
|
221
|
+
for f in sample:
|
|
222
|
+
embeddings.append(self._embedding_fn(f.get("text", "")))
|
|
223
|
+
|
|
224
|
+
for i in range(len(sample)):
|
|
225
|
+
for j in range(i + 1, len(sample)):
|
|
226
|
+
sim = _cosine_sim(embeddings[i], embeddings[j])
|
|
227
|
+
if sim > EMBEDDING_CONTRADICTION_SIM:
|
|
228
|
+
ed = _normalized_edit_distance(
|
|
229
|
+
sample[i].get("text", ""),
|
|
230
|
+
sample[j].get("text", ""),
|
|
231
|
+
)
|
|
232
|
+
if ed > EMBEDDING_CONTRADICTION_EDIT:
|
|
233
|
+
issues.append(ConsistencyIssue(
|
|
234
|
+
issue_type="semantic_contradiction",
|
|
235
|
+
description=(
|
|
236
|
+
f"High similarity ({sim:.2f}) but high edit distance ({ed:.2f}) "
|
|
237
|
+
f"between facts {i} and {j}"
|
|
238
|
+
),
|
|
239
|
+
severity="high",
|
|
240
|
+
facts=[i, j],
|
|
241
|
+
))
|
|
242
|
+
return issues
|
|
243
|
+
|
|
244
|
+
def _check_structural_completeness(
|
|
245
|
+
self,
|
|
246
|
+
window_outputs: list[str],
|
|
247
|
+
planned_sections: list[str],
|
|
248
|
+
) -> list[ConsistencyIssue]:
|
|
249
|
+
"""Check planned sections vs actual content."""
|
|
250
|
+
issues: list[ConsistencyIssue] = []
|
|
251
|
+
all_output = "\n".join(window_outputs).lower()
|
|
252
|
+
for section in planned_sections:
|
|
253
|
+
if section.lower() not in all_output:
|
|
254
|
+
issues.append(ConsistencyIssue(
|
|
255
|
+
issue_type="structural_gap",
|
|
256
|
+
description=f"Planned section '{section}' not found in output",
|
|
257
|
+
severity="medium",
|
|
258
|
+
))
|
|
259
|
+
return issues
|
|
260
|
+
|
|
261
|
+
# ------------------------------------------------------------------
|
|
262
|
+
# Tier 2: LLM-targeted (2B+ models)
|
|
263
|
+
# ------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def targeted_llm_validation(
|
|
266
|
+
self,
|
|
267
|
+
tier1_issues: list[ConsistencyIssue],
|
|
268
|
+
) -> ValidationResult:
|
|
269
|
+
"""Targeted LLM validation for Tier 1 issues."""
|
|
270
|
+
if not self._dispatch_fn:
|
|
271
|
+
return ValidationResult(tier=2, issues=tier1_issues)
|
|
272
|
+
|
|
273
|
+
confirmed: list[ConsistencyIssue] = []
|
|
274
|
+
for issue in tier1_issues:
|
|
275
|
+
if issue.issue_type == "numerical_contradiction":
|
|
276
|
+
# Always confirmed (objective)
|
|
277
|
+
issue.confirmed = True
|
|
278
|
+
confirmed.append(issue)
|
|
279
|
+
elif issue.issue_type == "semantic_contradiction":
|
|
280
|
+
# Binary YES/NO question
|
|
281
|
+
prompt = (
|
|
282
|
+
"Answer ONLY YES or NO: Do the following two statements contradict each other?\n"
|
|
283
|
+
f"Statement: {issue.description}"
|
|
284
|
+
)
|
|
285
|
+
try:
|
|
286
|
+
output, _ = self._dispatch_fn(prompt, "")
|
|
287
|
+
if "YES" in output.upper():
|
|
288
|
+
issue.confirmed = True
|
|
289
|
+
except Exception:
|
|
290
|
+
issue.confirmed = False
|
|
291
|
+
confirmed.append(issue)
|
|
292
|
+
else:
|
|
293
|
+
confirmed.append(issue)
|
|
294
|
+
|
|
295
|
+
return ValidationResult(tier=2, issues=confirmed)
|
|
296
|
+
|
|
297
|
+
# ------------------------------------------------------------------
|
|
298
|
+
# Tier 3: Full LLM review (7B+ models)
|
|
299
|
+
# ------------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
def full_llm_review(
|
|
302
|
+
self,
|
|
303
|
+
accumulated_output: str,
|
|
304
|
+
task_intent: str,
|
|
305
|
+
top_facts: list[dict[str, Any]] | None = None,
|
|
306
|
+
) -> ValidationResult:
|
|
307
|
+
"""Dedicated review window with top facts + document map."""
|
|
308
|
+
if not self._dispatch_fn:
|
|
309
|
+
return ValidationResult(tier=3, issues=[])
|
|
310
|
+
|
|
311
|
+
facts_section = ""
|
|
312
|
+
if top_facts:
|
|
313
|
+
facts_section = "\n".join(
|
|
314
|
+
f"- {f.get('text', '')}" for f in top_facts[:50]
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
prompt = (
|
|
318
|
+
"You are a review assistant. Analyze this output for:\n"
|
|
319
|
+
"1. Contradictions\n2. Unsupported claims\n3. Logical inconsistencies\n"
|
|
320
|
+
"4. Missing connections\n5. Argument drift\n\n"
|
|
321
|
+
f"Task: {task_intent}\n\n"
|
|
322
|
+
f"Top facts:\n{facts_section}\n\n"
|
|
323
|
+
"If no issues found, respond: NO ISSUES FOUND.\n"
|
|
324
|
+
"Otherwise, list numbered issues."
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
output, _ = self._dispatch_fn(prompt, accumulated_output[:5000])
|
|
329
|
+
except Exception:
|
|
330
|
+
return ValidationResult(tier=3, issues=[])
|
|
331
|
+
|
|
332
|
+
issues: list[ConsistencyIssue] = []
|
|
333
|
+
if "NO ISSUES FOUND" not in output.upper():
|
|
334
|
+
# Parse numbered issues
|
|
335
|
+
for line in output.split("\n"):
|
|
336
|
+
line = line.strip()
|
|
337
|
+
if re.match(r"\d+\.", line):
|
|
338
|
+
issues.append(ConsistencyIssue(
|
|
339
|
+
issue_type="llm_review",
|
|
340
|
+
description=line,
|
|
341
|
+
severity="medium",
|
|
342
|
+
confirmed=True,
|
|
343
|
+
))
|
|
344
|
+
|
|
345
|
+
return ValidationResult(tier=3, issues=issues)
|
|
346
|
+
|
|
347
|
+
# ------------------------------------------------------------------
|
|
348
|
+
# Model capability assessment
|
|
349
|
+
# ------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
def assess_review_capability(self) -> int:
|
|
352
|
+
"""Probe model to determine max validation tier (1, 2, or 3)."""
|
|
353
|
+
if self._model_capability is not None:
|
|
354
|
+
return self._model_capability
|
|
355
|
+
|
|
356
|
+
if not self._dispatch_fn:
|
|
357
|
+
self._model_capability = 1
|
|
358
|
+
return 1
|
|
359
|
+
|
|
360
|
+
# Tier 2 probe: binary contradiction
|
|
361
|
+
try:
|
|
362
|
+
output, _ = self._dispatch_fn(
|
|
363
|
+
"Answer YES or NO only: Does 'the server runs on port 80' "
|
|
364
|
+
"contradict 'the service uses port 443'?",
|
|
365
|
+
"",
|
|
366
|
+
)
|
|
367
|
+
if "YES" in output.upper():
|
|
368
|
+
tier = 2
|
|
369
|
+
else:
|
|
370
|
+
self._model_capability = 1
|
|
371
|
+
return 1
|
|
372
|
+
except Exception:
|
|
373
|
+
self._model_capability = 1
|
|
374
|
+
return 1
|
|
375
|
+
|
|
376
|
+
# Tier 3 probe: structured analysis
|
|
377
|
+
try:
|
|
378
|
+
output, _ = self._dispatch_fn(
|
|
379
|
+
"Analyze these two claims and list any contradictions as numbered items:\n"
|
|
380
|
+
"Claim A: All vulnerabilities have been patched.\n"
|
|
381
|
+
"Claim B: CVE-2024-1234 remains exploitable.",
|
|
382
|
+
"",
|
|
383
|
+
)
|
|
384
|
+
has_numbered = bool(re.search(r"\d+\.", output))
|
|
385
|
+
has_contradiction = any(
|
|
386
|
+
w in output.lower()
|
|
387
|
+
for w in ["contradict", "conflict", "inconsist", "exploit"]
|
|
388
|
+
)
|
|
389
|
+
if has_numbered and has_contradiction:
|
|
390
|
+
tier = 3
|
|
391
|
+
except Exception:
|
|
392
|
+
pass
|
|
393
|
+
|
|
394
|
+
self._model_capability = tier
|
|
395
|
+
return tier
|
|
396
|
+
|
|
397
|
+
# ------------------------------------------------------------------
|
|
398
|
+
# Correction pipeline
|
|
399
|
+
# ------------------------------------------------------------------
|
|
400
|
+
|
|
401
|
+
def apply_corrections(
|
|
402
|
+
self,
|
|
403
|
+
issues: list[ConsistencyIssue],
|
|
404
|
+
task_intent: str = "",
|
|
405
|
+
blockers: list[str] | None = None,
|
|
406
|
+
) -> list[str]:
|
|
407
|
+
"""Apply corrections based on config mode.
|
|
408
|
+
|
|
409
|
+
Returns list of correction actions taken.
|
|
410
|
+
"""
|
|
411
|
+
actions: list[str] = []
|
|
412
|
+
sorted_issues = sorted(
|
|
413
|
+
issues,
|
|
414
|
+
key=lambda i: {"high": 0, "medium": 1, "low": 2}.get(i.severity, 3),
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
if self.config.correction_mode == "flag":
|
|
418
|
+
if blockers is not None:
|
|
419
|
+
for issue in sorted_issues:
|
|
420
|
+
blockers.append(f"[{issue.severity}] {issue.description}")
|
|
421
|
+
actions.append(f"flagged: {issue.description[:80]}")
|
|
422
|
+
elif self.config.correction_mode == "correct" and self._dispatch_fn:
|
|
423
|
+
for issue in sorted_issues[:self.config.max_correction_windows]:
|
|
424
|
+
if issue.issue_type == "numerical_contradiction":
|
|
425
|
+
prompt = (
|
|
426
|
+
f"Resolve this numerical contradiction: {issue.description}. "
|
|
427
|
+
f"Provide the correct value only."
|
|
428
|
+
)
|
|
429
|
+
try:
|
|
430
|
+
output, _ = self._dispatch_fn(prompt, "")
|
|
431
|
+
actions.append(f"corrected: {issue.description[:80]} → {output[:100]}")
|
|
432
|
+
except Exception:
|
|
433
|
+
actions.append(f"correction_failed: {issue.description[:80]}")
|
|
434
|
+
elif issue.issue_type == "semantic_contradiction":
|
|
435
|
+
prompt = (
|
|
436
|
+
f"Resolve this contradiction: {issue.description}. "
|
|
437
|
+
f"Which statement is correct?"
|
|
438
|
+
)
|
|
439
|
+
try:
|
|
440
|
+
output, _ = self._dispatch_fn(prompt, "")
|
|
441
|
+
actions.append(f"resolved: {issue.description[:80]} → {output[:100]}")
|
|
442
|
+
except Exception:
|
|
443
|
+
actions.append(f"resolution_failed: {issue.description[:80]}")
|
|
444
|
+
else:
|
|
445
|
+
actions.append(f"skipped: {issue.description[:80]}")
|
|
446
|
+
|
|
447
|
+
return actions
|
|
448
|
+
|
|
449
|
+
def should_run_tier(self, window_index: int, tier: int) -> bool:
|
|
450
|
+
"""Check if a validation tier should run at this window index."""
|
|
451
|
+
if tier == 1:
|
|
452
|
+
return window_index % self.config.tier_1_interval == 0
|
|
453
|
+
if tier == 2:
|
|
454
|
+
return (
|
|
455
|
+
self.config.tier_2_enabled
|
|
456
|
+
and window_index % self.config.tier_2_interval == 0
|
|
457
|
+
)
|
|
458
|
+
if tier == 3:
|
|
459
|
+
return (
|
|
460
|
+
self.config.tier_3_enabled
|
|
461
|
+
and window_index % self.config.tier_3_interval == 0
|
|
462
|
+
)
|
|
463
|
+
return False
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
# ---------------------------------------------------------------------------
|
|
467
|
+
# Utility
|
|
468
|
+
# ---------------------------------------------------------------------------
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _cosine_sim(a: list[float], b: list[float]) -> float:
|
|
472
|
+
if len(a) != len(b) or not a:
|
|
473
|
+
return 0.0
|
|
474
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
475
|
+
na = math.sqrt(sum(x * x for x in a))
|
|
476
|
+
nb = math.sqrt(sum(x * x for x in b))
|
|
477
|
+
return dot / (na * nb) if na > 0 and nb > 0 else 0.0
|