groundguard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ from groundguard.core.verifier import (
2
+ verify,
3
+ averify,
4
+ verify_batch,
5
+ averify_batch,
6
+ verify_batch_async,
7
+ verify_analysis,
8
+ averify_analysis,
9
+ verify_answer,
10
+ averify_answer,
11
+ verify_clause,
12
+ averify_clause,
13
+ verify_structured,
14
+ )
15
+ from groundguard.models.result import (
16
+ GroundingResult,
17
+ ContextualizedClaimUnit,
18
+ VerificationAuditRecord,
19
+ )
20
+ from groundguard.profiles import (
21
+ VerificationProfile,
22
+ STRICT_PROFILE,
23
+ GENERAL_PROFILE,
24
+ RESEARCH_PROFILE,
25
+ )
26
+ from groundguard.circuit_breaker import (
27
+ assert_faithful,
28
+ assert_grounded,
29
+ verify_or_retry,
30
+ GroundingError,
31
+ )
32
+ from groundguard.loaders.accumulator import GroundingAccumulator, SourceAccumulator
33
+ from groundguard.cost_estimate import (
34
+ CostEstimate,
35
+ estimate_verify_analysis_cost,
36
+ estimate_verify_faithfulness_cost,
37
+ )
@@ -0,0 +1,14 @@
1
+ """Shared constants for the groundguard library."""
2
+ from __future__ import annotations
3
+
4
+ import litellm
5
+
6
+ # FIX-02: Unified transient error tuple used by both tier3_evaluation.py and verifier.py.
7
+ # Previously tier3_evaluation.py was missing litellm.exceptions.Timeout, meaning timeouts
8
+ # were not retried by the backoff loop — only caught and re-raised by the orchestrator.
9
+ TRANSIENT_LITELLM_ERRORS = (
10
+ litellm.exceptions.ServiceUnavailableError,
11
+ litellm.exceptions.RateLimitError,
12
+ litellm.exceptions.APIConnectionError,
13
+ litellm.exceptions.Timeout,
14
+ )
groundguard/_log.py ADDED
@@ -0,0 +1,4 @@
1
+ """Library logger."""
2
+ import logging
3
+
4
+ logger = logging.getLogger("groundguard")
@@ -0,0 +1,4 @@
1
+ """Model adapter registry for provider-specific LLM quirk handling."""
2
+ from groundguard.adapters.registry import get_adapter, ModelAdapter
3
+
4
+ __all__ = ["get_adapter", "ModelAdapter"]
@@ -0,0 +1,298 @@
1
+ """Model adapter registry — provider-specific pre/post-processing for litellm calls."""
2
+ from __future__ import annotations
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any, Callable
6
+
7
+ from groundguard._log import logger
8
+ from groundguard.exceptions import VerificationFailedError
9
+
10
+ _THINK_TAG_RE = re.compile(r'<think>.*?</think>', re.DOTALL | re.IGNORECASE)
11
+ # BUG-01: unanchored search so conversational pre/post-text is ignored.
12
+ # Only extract if the fenced content looks like JSON ({...} or [...]).
13
+ _MD_FENCE_RE = re.compile(r'```(?:json)?\s*\n?(.*?)\n?\s*```', re.DOTALL)
14
+
15
+
16
+ def _strip_fences(content: str) -> str:
17
+ """Strip markdown code fences and surrounding whitespace.
18
+
19
+ Uses unanchored search so conversational text before/after the fence
20
+ does not prevent extraction. Only returns the fenced content when it
21
+ looks like JSON (starts with '{' or '['); otherwise falls through to
22
+ return the original content stripped.
23
+ """
24
+ m = _MD_FENCE_RE.search(content)
25
+ if m:
26
+ extracted = m.group(1).strip()
27
+ if extracted.startswith('{') or extracted.startswith('['):
28
+ return extracted
29
+ return content.strip()
30
+
31
+
32
+ def _strip_think_tags(content: str) -> str:
33
+ """
34
+ Strip chain-of-thought <think> blocks from Ollama thinking-capable models.
35
+
36
+ Uses rfind('</think>') split rather than regex-only, because quantized/local
37
+ LLMs frequently hallucinate malformed closing tags (</thinking>, <\\think>, or
38
+ omit the closing tag entirely). rfind on the last occurrence is resilient to
39
+ all of these — it discards everything up to and including the last </think>
40
+ variant if present, then falls through to regex for well-formed tags.
41
+
42
+ Edge case — max_tokens exhaustion mid-thought: if the model emits <think> but
43
+ hits the token limit before writing </think>, there is no closing tag. In this
44
+ case rfind returns -1 AND the regex matches nothing, so stripped == content.
45
+ Detecting a leading <think> opener here returns "" to signal "no usable JSON".
46
+ """
47
+ lower = content.lower()
48
+ # Find the last occurrence of any </think...> closing tag variant
49
+ think_end = lower.rfind('</think')
50
+ if think_end != -1:
51
+ # Advance past the tag's closing >
52
+ close_bracket = content.find('>', think_end)
53
+ if close_bracket != -1:
54
+ return content[close_bracket + 1:].strip()
55
+ # Fallback: regex for well-formed <think>...</think> blocks
56
+ stripped = _THINK_TAG_RE.sub('', content).strip()
57
+ # If regex changed nothing and content opens with <think>, the model hit
58
+ # max_tokens mid-thought — entire content is reasoning, no JSON present.
59
+ if stripped == content.strip() and lower.lstrip().startswith('<think'):
60
+ return ""
61
+ return stripped
62
+
63
+
64
+ @dataclass
65
+ class ModelAdapter:
66
+ """
67
+ Protocol for provider-specific LLM quirk handling.
68
+
69
+ build_kwargs(base_kwargs: dict) -> dict
70
+ Takes the base litellm.completion() kwargs dict and returns the final kwargs dict.
71
+ Adapter is free to add, remove, or modify any key (e.g. OPENAI_REASONING_ADAPTER
72
+ pops 'temperature' to avoid API errors on o1/o3/o4/gpt-5 models).
73
+ Default: return base_kwargs unchanged.
74
+
75
+ post_process(response, model) -> str
76
+ Extract normalized content string from a raw LiteLLM response object.
77
+ Raises VerificationFailedError on unrecoverable content.
78
+ Content returned here is fed directly into Tier3ResponseModel.model_validate_json().
79
+ """
80
+ name: str
81
+ build_kwargs: Callable[[dict], dict]
82
+ post_process: Callable[[Any, str], str]
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # DEFAULT_ADAPTER — used for all unrecognized models
87
+ # ---------------------------------------------------------------------------
88
+ def _default_post_process(response: Any, model: str = "") -> str:
89
+ content = response.choices[0].message.content or ""
90
+ return _strip_fences(content)
91
+
92
+
93
+ DEFAULT_ADAPTER = ModelAdapter(
94
+ name="default",
95
+ build_kwargs=lambda base: dict(base),
96
+ post_process=_default_post_process,
97
+ )
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # OLLAMA_ADAPTER — ollama/ and ollama_chat/ prefixes
102
+ # ---------------------------------------------------------------------------
103
+ def _ollama_build_kwargs(base: dict) -> dict:
104
+ """Force ollama/ → ollama_chat/ and ensure sufficient context for structured output.
105
+
106
+ Two issues this fixes:
107
+
108
+ 1. litellm routes 'ollama/' to /api/generate, which mishandles structured-output
109
+ responses from thinking-capable models (qwen3, DeepSeek-R1, etc.): the JSON
110
+ schema output lands in the 'thinking' field while 'response' is empty.
111
+ /api/chat correctly splits 'content' (JSON) from 'thinking' (reasoning).
112
+
113
+ 2. Models with a small default num_ctx (e.g. 4K) exhaust their token budget
114
+ during the thinking phase, leaving nothing for the JSON output. We override
115
+ num_ctx to 8192 so thinking-capable models have room to reason AND output
116
+ the full structured response. This override can be raised further if needed.
117
+ """
118
+ base = dict(base)
119
+ model = base.get("model", "")
120
+ if model.startswith("ollama/"):
121
+ base["model"] = "ollama_chat/" + model[len("ollama/"):]
122
+ # Ensure enough context for thinking + structured JSON output (16K covers full Tier3 prompts)
123
+ options = base.get("extra_body", {}).get("options", {})
124
+ options.setdefault("num_ctx", 16384)
125
+ base.setdefault("extra_body", {})["options"] = options
126
+ # keep_alive=300 holds the model in memory for 5 min so sequential calls don't reload
127
+ base.setdefault("extra_body", {}).setdefault("keep_alive", 300)
128
+ return base
129
+
130
+
131
+ def _ollama_post_process(response: Any, model: str = "") -> str:
132
+ msg = response.choices[0].message
133
+ content = msg.content
134
+ if content:
135
+ content = _strip_think_tags(content)
136
+ if not content:
137
+ # litellm may drop content if reasoning_content is present — try fallback
138
+ fallback = getattr(msg, 'reasoning_content', None) or ""
139
+ fallback = fallback.strip()
140
+ if fallback.startswith('{'):
141
+ content = fallback
142
+ else:
143
+ # BUG-02: return "" instead of raising so Tier 3 retry loop catches it
144
+ # (ValidationError on empty string) and retries rather than propagating.
145
+ logger.warning(
146
+ "Ollama returned empty content and reasoning_content has no JSON — "
147
+ "returning empty string for retry"
148
+ )
149
+ return ""
150
+ return _strip_fences(content)
151
+
152
+
153
+ OLLAMA_ADAPTER = ModelAdapter(
154
+ name="ollama",
155
+ build_kwargs=_ollama_build_kwargs,
156
+ post_process=_ollama_post_process,
157
+ )
158
+
159
+
160
+ # ---------------------------------------------------------------------------
161
+ # NIM_THINKING_ADAPTER — NIM-hosted thinking models (kimi-k2, gpt-oss, etc.)
162
+ # Uses reasoning_content fallback like OLLAMA_ADAPTER but sends no
163
+ # Ollama-specific extra_body fields (options/keep_alive) to the NIM endpoint.
164
+ # ---------------------------------------------------------------------------
165
+ NIM_THINKING_ADAPTER = ModelAdapter(
166
+ name="nim_thinking",
167
+ build_kwargs=lambda base: dict(base),
168
+ post_process=_ollama_post_process,
169
+ )
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # NEMOTRON_NIM_ADAPTER — nvidia/nemotron-3-super-120b-a12b
174
+ # Requires chat_template_kwargs + reasoning_budget in extra_body, otherwise
175
+ # the server hangs without returning a response.
176
+ # ---------------------------------------------------------------------------
177
+ def _nemotron_build_kwargs(base: dict) -> dict:
178
+ base = dict(base)
179
+ extra = base.setdefault("extra_body", {})
180
+ extra.setdefault("chat_template_kwargs", {"enable_thinking": True})
181
+ extra.setdefault("reasoning_budget", 16384)
182
+ return base
183
+
184
+
185
+ NEMOTRON_NIM_ADAPTER = ModelAdapter(
186
+ name="nemotron_nim",
187
+ build_kwargs=_nemotron_build_kwargs,
188
+ post_process=_ollama_post_process,
189
+ )
190
+
191
+
192
+ # ---------------------------------------------------------------------------
193
+ # OPENAI_REASONING_ADAPTER — o1, o3, o4, gpt-5 series
194
+ # ---------------------------------------------------------------------------
195
+ def _openai_reasoning_build_kwargs(base: dict) -> dict:
196
+ base = dict(base)
197
+ base.pop("temperature", None)
198
+ return base
199
+
200
+
201
+ OPENAI_REASONING_ADAPTER = ModelAdapter(
202
+ name="openai_reasoning",
203
+ build_kwargs=_openai_reasoning_build_kwargs,
204
+ post_process=_default_post_process,
205
+ )
206
+
207
+
208
+ # ---------------------------------------------------------------------------
209
+ # ANTHROPIC_ADAPTER — anthropic/ prefix and claude- prefix models
210
+ # ---------------------------------------------------------------------------
211
+ def _anthropic_post_process(response: Any, model: str = "") -> str:
212
+ content = response.choices[0].message.content or ""
213
+ # Never use message.parsed — force raw content path to avoid litellm #20533
214
+ return _strip_fences(content)
215
+
216
+
217
+ ANTHROPIC_ADAPTER = ModelAdapter(
218
+ name="anthropic",
219
+ build_kwargs=lambda base: dict(base),
220
+ post_process=_anthropic_post_process,
221
+ )
222
+
223
+
224
+ # ---------------------------------------------------------------------------
225
+ # GOOGLE_ADAPTER — gemini/ and vertex_ai/gemini prefixes
226
+ # ---------------------------------------------------------------------------
227
+ def _google_post_process(response: Any, model: str = "") -> str:
228
+ content = response.choices[0].message.content
229
+ if not content:
230
+ # BUG-02: return "" instead of raising so Tier 3 retry loop retries.
231
+ logger.warning(
232
+ "Gemini returned empty content (possible safety filter) — "
233
+ "returning empty string for retry"
234
+ )
235
+ return ""
236
+ return _strip_fences(content)
237
+
238
+
239
+ GOOGLE_ADAPTER = ModelAdapter(
240
+ name="google",
241
+ build_kwargs=lambda base: dict(base),
242
+ post_process=_google_post_process,
243
+ )
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # JSON_OBJECT_ADAPTER — models that support only json_object (not json_schema)
248
+ # e.g. nvidia_nim/microsoft/phi-4-mini-instruct
249
+ # ---------------------------------------------------------------------------
250
+ def _json_object_build_kwargs(base: dict) -> dict:
251
+ base = dict(base)
252
+ base["response_format"] = {"type": "json_object"}
253
+ return base
254
+
255
+
256
+ JSON_OBJECT_ADAPTER = ModelAdapter(
257
+ name="json_object",
258
+ build_kwargs=_json_object_build_kwargs,
259
+ post_process=_default_post_process,
260
+ )
261
+
262
+
263
+ # ---------------------------------------------------------------------------
264
+ # Registry & Lookup — ordered most-specific to least-specific prefix
265
+ # ---------------------------------------------------------------------------
266
+ _REGISTRY: list[tuple[str, ModelAdapter]] = [
267
+ ("ollama_chat/", OLLAMA_ADAPTER),
268
+ ("ollama/", OLLAMA_ADAPTER),
269
+ # NIM thinking models — emit reasoning_content
270
+ ("nvidia_nim/deepseek", NIM_THINKING_ADAPTER), # DeepSeek-R1/V3 on NIM
271
+ ("nvidia_nim/nvidia/nemotron-3-super", NEMOTRON_NIM_ADAPTER), # requires chat_template_kwargs
272
+ ("nvidia_nim/nvidia/nemotron-3-nano", NEMOTRON_NIM_ADAPTER), # requires chat_template_kwargs
273
+ ("nvidia_nim/moonshotai/kimi-k2", NIM_THINKING_ADAPTER), # Kimi K2 thinking
274
+ ("nvidia_nim/openai/gpt-oss", NIM_THINKING_ADAPTER), # GPT-OSS thinking
275
+ # NIM json_object-only models
276
+ ("nvidia_nim/microsoft/phi-4-mini", JSON_OBJECT_ADAPTER),
277
+ ("vertex_ai/gemini", GOOGLE_ADAPTER),
278
+ ("gemini/", GOOGLE_ADAPTER),
279
+ ("anthropic/", ANTHROPIC_ADAPTER),
280
+ ("claude-", ANTHROPIC_ADAPTER),
281
+ ("o1", OPENAI_REASONING_ADAPTER),
282
+ ("o3", OPENAI_REASONING_ADAPTER),
283
+ ("o4", OPENAI_REASONING_ADAPTER),
284
+ ("gpt-5", OPENAI_REASONING_ADAPTER),
285
+ ]
286
+
287
+
288
+ def get_adapter(model: str) -> ModelAdapter:
289
+ """
290
+ Longest-prefix match against _REGISTRY. Returns DEFAULT_ADAPTER for unrecognized models.
291
+
292
+ The registry is ordered by prefix length (longest first) to ensure that
293
+ 'ollama_chat/' matches before 'ollama/' for models like 'ollama_chat/deepseek-r1'.
294
+ """
295
+ for prefix, adapter in _REGISTRY:
296
+ if model.startswith(prefix):
297
+ return adapter
298
+ return DEFAULT_ADAPTER
@@ -0,0 +1,32 @@
1
+ """Assertion-style circuit breakers for grounding verification."""
2
+ from groundguard.models.result import GroundingResult, Source
3
+ from groundguard.core.verifier import verify_answer, verify_analysis
4
+
5
+
6
+ class GroundingError(Exception):
7
+ pass
8
+
9
+
10
+ def assert_faithful(output: str, sources: list[Source], **kwargs) -> None:
11
+ result = verify_answer(output, sources, **kwargs)
12
+ if not result.is_grounded:
13
+ raise GroundingError(
14
+ f"Output not grounded: score={result.score:.2f}, status={result.status}"
15
+ )
16
+
17
+
18
+ def assert_grounded(analysis: str, sources: list[Source], **kwargs) -> None:
19
+ result = verify_analysis(analysis, sources, **kwargs)
20
+ if not result.is_grounded:
21
+ raise GroundingError(
22
+ f"Analysis not grounded: score={result.score:.2f}"
23
+ )
24
+
25
+
26
+ def verify_or_retry(generator, sources: list[Source], max_retries: int = 3, **kwargs) -> str:
27
+ for attempt in range(max_retries):
28
+ output = generator()
29
+ result = verify_answer(output, sources, **kwargs)
30
+ if result.is_grounded:
31
+ return output
32
+ raise GroundingError(f"Output not grounded after {max_retries} attempts")
File without changes
@@ -0,0 +1,85 @@
1
+ """Claim extraction from free-form text using LLM."""
2
+ from __future__ import annotations
3
+ import secrets
4
+ from typing import TYPE_CHECKING
5
+
6
+ import pydantic
7
+
8
+ from groundguard.exceptions import ParseError
9
+ from groundguard.tiers.tier3_evaluation import _completion_with_backoff, _acompletion_with_backoff
10
+
11
+ if TYPE_CHECKING:
12
+ from groundguard.models.result import Source
13
+
14
+
15
+ CLAIM_EXTRACTION_PROMPT = """Extract all distinct factual claims from the text below.
16
+ Return JSON with key "claims" containing a list of strings.
17
+ Each string is one atomic, self-contained factual claim.
18
+
19
+ Text (boundary: {boundary}):
20
+ {text}
21
+
22
+ Sources provided:
23
+ {sources_block}
24
+
25
+ Return only JSON. Example: {{"claims": ["claim 1", "claim 2"]}}"""
26
+
27
+
28
+ class _ClaimList(pydantic.BaseModel):
29
+ claims: list[str]
30
+
31
+
32
+ def extract_claims(
33
+ text: str,
34
+ sources: list,
35
+ model: str,
36
+ max_spend: float = float("inf"),
37
+ api_base: str | None = None,
38
+ ) -> list[str]:
39
+ boundary = secrets.token_hex(6)
40
+ sources_block = "\n".join(f"- {s.source_id}: {s.content[:200]}" for s in sources)
41
+ prompt = CLAIM_EXTRACTION_PROMPT.format(
42
+ boundary=boundary, text=text, sources_block=sources_block
43
+ )
44
+ for attempt in range(2):
45
+ try:
46
+ response = _completion_with_backoff(
47
+ model=model,
48
+ messages=[{"role": "user", "content": prompt}],
49
+ **({"api_base": api_base} if api_base else {}),
50
+ )
51
+ content = response.choices[0].message.content
52
+ parsed = _ClaimList.model_validate_json(content)
53
+ return parsed.claims
54
+ except (pydantic.ValidationError, ValueError):
55
+ if attempt == 1:
56
+ raise ParseError("claim extraction failed after 2 attempts")
57
+ return []
58
+
59
+
60
+ async def extract_claims_async(
61
+ text: str,
62
+ sources: list,
63
+ model: str,
64
+ max_spend: float = float("inf"),
65
+ api_base: str | None = None,
66
+ ) -> list[str]:
67
+ boundary = secrets.token_hex(6)
68
+ sources_block = "\n".join(f"- {s.source_id}: {s.content[:200]}" for s in sources)
69
+ prompt = CLAIM_EXTRACTION_PROMPT.format(
70
+ boundary=boundary, text=text, sources_block=sources_block
71
+ )
72
+ for attempt in range(2):
73
+ try:
74
+ response = await _acompletion_with_backoff(
75
+ model=model,
76
+ messages=[{"role": "user", "content": prompt}],
77
+ **({"api_base": api_base} if api_base else {}),
78
+ )
79
+ content = response.choices[0].message.content
80
+ parsed = _ClaimList.model_validate_json(content)
81
+ return parsed.claims
82
+ except (pydantic.ValidationError, ValueError):
83
+ if attempt == 1:
84
+ raise ParseError("claim extraction async failed after 2 attempts")
85
+ return []
@@ -0,0 +1,50 @@
1
+ """Tier 0 classifier — rules-based Extractive/Inferential atom classification."""
2
+ from __future__ import annotations
3
+ import re
4
+ from groundguard.models.internal import ClassifiedAtom
5
+
6
+ INFERENTIAL_SIGNALS = {
7
+ "trend", "trajectory", "suggests", "indicates", "on track", "at risk",
8
+ "appears to", "likely", "projected", "based on", "derived from",
9
+ "analysis shows", "pattern", "forecast", "outlook", "implies",
10
+ "consistent with", "points to", "expected to",
11
+ }
12
+
13
+ # Decimal-safe sentence splitter: splits on [.!?] NOT between two digits, or newlines.
14
+ # Preserves: $4.2M, v2.1, 3.14
15
+ _SENTENCE_SPLIT_RE = re.compile(r'(?<!\d)[.!?](?!\d)\s+|\n+')
16
+
17
+
18
+ def parse_and_classify(claim: str) -> list[ClassifiedAtom]:
19
+ """
20
+ Zero-cost, zero-LLM heuristic classifier.
21
+
22
+ 1. Split claim into atomic sentences using decimal-safe regex.
23
+ 2. For each sentence: classify as Inferential if any INFERENTIAL_SIGNALS token
24
+ appears as a case-insensitive whole-word match; otherwise Extractive.
25
+
26
+ Returns:
27
+ List of ClassifiedAtom objects. Empty string returns empty list.
28
+ Punctuation-only input returns empty list (no IndexError).
29
+ """
30
+ if not claim or not claim.strip():
31
+ return []
32
+
33
+ sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(claim) if s.strip()]
34
+
35
+ if not sentences:
36
+ return []
37
+
38
+ atoms: list[ClassifiedAtom] = []
39
+ for sentence in sentences:
40
+ lower = sentence.lower()
41
+ is_inferential = any(
42
+ re.search(rf'\b{re.escape(signal)}\b', lower)
43
+ for signal in INFERENTIAL_SIGNALS
44
+ )
45
+ atoms.append(ClassifiedAtom(
46
+ claim_text=sentence,
47
+ claim_type="Inferential" if is_inferential else "Extractive",
48
+ ))
49
+
50
+ return atoms
@@ -0,0 +1,60 @@
1
+ """Phase 23 ResultBuilder — citation extraction and invariant enforcement."""
2
+ from __future__ import annotations
3
+ from typing import TYPE_CHECKING
4
+
5
+ from groundguard.models.result import AtomicClaimResult, Citation
6
+ from groundguard.exceptions import InvariantError
7
+
8
+ if TYPE_CHECKING:
9
+ from groundguard.models.result import Source
10
+ from groundguard.tiers.tier25_preprocessing import Tier25Result
11
+
12
+
13
+ class ResultBuilder:
14
+
15
+ @staticmethod
16
+ def build_numerical_fast_exit(claim: str, tier25: Tier25Result, source: Source) -> AtomicClaimResult:
17
+ citation = tier25.conflict_citation
18
+ result = AtomicClaimResult(
19
+ claim_text=claim, claim_type="Extractive", status="CONTRADICTED",
20
+ source_id=source.source_id, verification_method="tier25_numerical", citation=citation,
21
+ )
22
+ ResultBuilder._assert_citation_invariant("CONTRADICTED", citation)
23
+ return result
24
+
25
+ @staticmethod
26
+ def build_lexical_pass(claim: str, top_chunks: list, score: float, source: Source) -> AtomicClaimResult:
27
+ if top_chunks:
28
+ chunk = top_chunks[0]
29
+ excerpt_text = chunk.text_content
30
+ char_start = chunk.char_start
31
+ char_end = chunk.char_end
32
+ else:
33
+ excerpt_text = source.content[:100] if source.content else ""
34
+ char_start, char_end = 0, len(excerpt_text)
35
+
36
+ citation = Citation(
37
+ source_id=source.source_id, excerpt=excerpt_text,
38
+ excerpt_char_start=char_start, excerpt_char_end=char_end, citation_confidence=1.0,
39
+ )
40
+ result = AtomicClaimResult(
41
+ claim_text=claim, claim_type="Extractive", status="VERIFIED",
42
+ source_id=source.source_id, verification_method="tier2_lexical", citation=citation,
43
+ )
44
+ ResultBuilder._assert_citation_invariant("VERIFIED", citation)
45
+ return result
46
+
47
+ @staticmethod
48
+ def build_llm_result(claim: str, verdict: str, citation: Citation | None = None) -> AtomicClaimResult:
49
+ effective_citation = None if verdict == "UNVERIFIABLE" else citation
50
+ result = AtomicClaimResult(
51
+ claim_text=claim, claim_type="Extractive", status=verdict,
52
+ verification_method="tier3_llm", citation=effective_citation,
53
+ )
54
+ ResultBuilder._assert_citation_invariant(verdict, result.citation)
55
+ return result
56
+
57
+ @staticmethod
58
+ def _assert_citation_invariant(verdict: str, citation: Citation | None) -> None:
59
+ if verdict == "VERIFIED" and citation is None:
60
+ raise InvariantError("citation must be non-null for VERIFIED results")