groundguard 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundguard/__init__.py +37 -0
- groundguard/_constants.py +14 -0
- groundguard/_log.py +4 -0
- groundguard/adapters/__init__.py +4 -0
- groundguard/adapters/registry.py +298 -0
- groundguard/circuit_breaker.py +32 -0
- groundguard/core/__init__.py +0 -0
- groundguard/core/claim_extractor.py +85 -0
- groundguard/core/classifier.py +50 -0
- groundguard/core/result_builder.py +60 -0
- groundguard/core/verifier.py +758 -0
- groundguard/cost_estimate.py +137 -0
- groundguard/exceptions.py +55 -0
- groundguard/integrations/__init__.py +0 -0
- groundguard/integrations/langchain.py +64 -0
- groundguard/loaders/__init__.py +0 -0
- groundguard/loaders/accumulator.py +108 -0
- groundguard/loaders/chunker.py +121 -0
- groundguard/loaders/helpers.py +54 -0
- groundguard/loaders/legal.py +124 -0
- groundguard/loaders/structured.py +104 -0
- groundguard/models/__init__.py +0 -0
- groundguard/models/builder.py +162 -0
- groundguard/models/internal.py +154 -0
- groundguard/models/result.py +178 -0
- groundguard/models/tier3.py +96 -0
- groundguard/profiles.py +39 -0
- groundguard/tiers/__init__.py +0 -0
- groundguard/tiers/tier1_authenticity.py +63 -0
- groundguard/tiers/tier25_preprocessing.py +244 -0
- groundguard/tiers/tier2_semantic.py +83 -0
- groundguard/tiers/tier3_evaluation.py +401 -0
- groundguard-0.1.0.dist-info/METADATA +404 -0
- groundguard-0.1.0.dist-info/RECORD +36 -0
- groundguard-0.1.0.dist-info/WHEEL +4 -0
- groundguard-0.1.0.dist-info/licenses/LICENSE +21 -0
groundguard/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from groundguard.core.verifier import (
|
|
2
|
+
verify,
|
|
3
|
+
averify,
|
|
4
|
+
verify_batch,
|
|
5
|
+
averify_batch,
|
|
6
|
+
verify_batch_async,
|
|
7
|
+
verify_analysis,
|
|
8
|
+
averify_analysis,
|
|
9
|
+
verify_answer,
|
|
10
|
+
averify_answer,
|
|
11
|
+
verify_clause,
|
|
12
|
+
averify_clause,
|
|
13
|
+
verify_structured,
|
|
14
|
+
)
|
|
15
|
+
from groundguard.models.result import (
|
|
16
|
+
GroundingResult,
|
|
17
|
+
ContextualizedClaimUnit,
|
|
18
|
+
VerificationAuditRecord,
|
|
19
|
+
)
|
|
20
|
+
from groundguard.profiles import (
|
|
21
|
+
VerificationProfile,
|
|
22
|
+
STRICT_PROFILE,
|
|
23
|
+
GENERAL_PROFILE,
|
|
24
|
+
RESEARCH_PROFILE,
|
|
25
|
+
)
|
|
26
|
+
from groundguard.circuit_breaker import (
|
|
27
|
+
assert_faithful,
|
|
28
|
+
assert_grounded,
|
|
29
|
+
verify_or_retry,
|
|
30
|
+
GroundingError,
|
|
31
|
+
)
|
|
32
|
+
from groundguard.loaders.accumulator import GroundingAccumulator, SourceAccumulator
|
|
33
|
+
from groundguard.cost_estimate import (
|
|
34
|
+
CostEstimate,
|
|
35
|
+
estimate_verify_analysis_cost,
|
|
36
|
+
estimate_verify_faithfulness_cost,
|
|
37
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Shared constants for the groundguard library."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import litellm
|
|
5
|
+
|
|
6
|
+
# FIX-02: Unified transient error tuple used by both tier3_evaluation.py and verifier.py.
|
|
7
|
+
# Previously tier3_evaluation.py was missing litellm.exceptions.Timeout, meaning timeouts
|
|
8
|
+
# were not retried by the backoff loop — only caught and re-raised by the orchestrator.
|
|
9
|
+
TRANSIENT_LITELLM_ERRORS = (
|
|
10
|
+
litellm.exceptions.ServiceUnavailableError,
|
|
11
|
+
litellm.exceptions.RateLimitError,
|
|
12
|
+
litellm.exceptions.APIConnectionError,
|
|
13
|
+
litellm.exceptions.Timeout,
|
|
14
|
+
)
|
groundguard/_log.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Model adapter registry — provider-specific pre/post-processing for litellm calls."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from groundguard._log import logger
|
|
8
|
+
from groundguard.exceptions import VerificationFailedError
|
|
9
|
+
|
|
10
|
+
_THINK_TAG_RE = re.compile(r'<think>.*?</think>', re.DOTALL | re.IGNORECASE)
|
|
11
|
+
# BUG-01: unanchored search so conversational pre/post-text is ignored.
|
|
12
|
+
# Only extract if the fenced content looks like JSON ({...} or [...]).
|
|
13
|
+
_MD_FENCE_RE = re.compile(r'```(?:json)?\s*\n?(.*?)\n?\s*```', re.DOTALL)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _strip_fences(content: str) -> str:
|
|
17
|
+
"""Strip markdown code fences and surrounding whitespace.
|
|
18
|
+
|
|
19
|
+
Uses unanchored search so conversational text before/after the fence
|
|
20
|
+
does not prevent extraction. Only returns the fenced content when it
|
|
21
|
+
looks like JSON (starts with '{' or '['); otherwise falls through to
|
|
22
|
+
return the original content stripped.
|
|
23
|
+
"""
|
|
24
|
+
m = _MD_FENCE_RE.search(content)
|
|
25
|
+
if m:
|
|
26
|
+
extracted = m.group(1).strip()
|
|
27
|
+
if extracted.startswith('{') or extracted.startswith('['):
|
|
28
|
+
return extracted
|
|
29
|
+
return content.strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _strip_think_tags(content: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Strip chain-of-thought <think> blocks from Ollama thinking-capable models.
|
|
35
|
+
|
|
36
|
+
Uses rfind('</think>') split rather than regex-only, because quantized/local
|
|
37
|
+
LLMs frequently hallucinate malformed closing tags (</thinking>, <\\think>, or
|
|
38
|
+
omit the closing tag entirely). rfind on the last occurrence is resilient to
|
|
39
|
+
all of these — it discards everything up to and including the last </think>
|
|
40
|
+
variant if present, then falls through to regex for well-formed tags.
|
|
41
|
+
|
|
42
|
+
Edge case — max_tokens exhaustion mid-thought: if the model emits <think> but
|
|
43
|
+
hits the token limit before writing </think>, there is no closing tag. In this
|
|
44
|
+
case rfind returns -1 AND the regex matches nothing, so stripped == content.
|
|
45
|
+
Detecting a leading <think> opener here returns "" to signal "no usable JSON".
|
|
46
|
+
"""
|
|
47
|
+
lower = content.lower()
|
|
48
|
+
# Find the last occurrence of any </think...> closing tag variant
|
|
49
|
+
think_end = lower.rfind('</think')
|
|
50
|
+
if think_end != -1:
|
|
51
|
+
# Advance past the tag's closing >
|
|
52
|
+
close_bracket = content.find('>', think_end)
|
|
53
|
+
if close_bracket != -1:
|
|
54
|
+
return content[close_bracket + 1:].strip()
|
|
55
|
+
# Fallback: regex for well-formed <think>...</think> blocks
|
|
56
|
+
stripped = _THINK_TAG_RE.sub('', content).strip()
|
|
57
|
+
# If regex changed nothing and content opens with <think>, the model hit
|
|
58
|
+
# max_tokens mid-thought — entire content is reasoning, no JSON present.
|
|
59
|
+
if stripped == content.strip() and lower.lstrip().startswith('<think'):
|
|
60
|
+
return ""
|
|
61
|
+
return stripped
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class ModelAdapter:
|
|
66
|
+
"""
|
|
67
|
+
Protocol for provider-specific LLM quirk handling.
|
|
68
|
+
|
|
69
|
+
build_kwargs(base_kwargs: dict) -> dict
|
|
70
|
+
Takes the base litellm.completion() kwargs dict and returns the final kwargs dict.
|
|
71
|
+
Adapter is free to add, remove, or modify any key (e.g. OPENAI_REASONING_ADAPTER
|
|
72
|
+
pops 'temperature' to avoid API errors on o1/o3/o4/gpt-5 models).
|
|
73
|
+
Default: return base_kwargs unchanged.
|
|
74
|
+
|
|
75
|
+
post_process(response, model) -> str
|
|
76
|
+
Extract normalized content string from a raw LiteLLM response object.
|
|
77
|
+
Raises VerificationFailedError on unrecoverable content.
|
|
78
|
+
Content returned here is fed directly into Tier3ResponseModel.model_validate_json().
|
|
79
|
+
"""
|
|
80
|
+
name: str
|
|
81
|
+
build_kwargs: Callable[[dict], dict]
|
|
82
|
+
post_process: Callable[[Any, str], str]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# DEFAULT_ADAPTER — used for all unrecognized models
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
def _default_post_process(response: Any, model: str = "") -> str:
|
|
89
|
+
content = response.choices[0].message.content or ""
|
|
90
|
+
return _strip_fences(content)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
DEFAULT_ADAPTER = ModelAdapter(
|
|
94
|
+
name="default",
|
|
95
|
+
build_kwargs=lambda base: dict(base),
|
|
96
|
+
post_process=_default_post_process,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# OLLAMA_ADAPTER — ollama/ and ollama_chat/ prefixes
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
def _ollama_build_kwargs(base: dict) -> dict:
|
|
104
|
+
"""Force ollama/ → ollama_chat/ and ensure sufficient context for structured output.
|
|
105
|
+
|
|
106
|
+
Two issues this fixes:
|
|
107
|
+
|
|
108
|
+
1. litellm routes 'ollama/' to /api/generate, which mishandles structured-output
|
|
109
|
+
responses from thinking-capable models (qwen3, DeepSeek-R1, etc.): the JSON
|
|
110
|
+
schema output lands in the 'thinking' field while 'response' is empty.
|
|
111
|
+
/api/chat correctly splits 'content' (JSON) from 'thinking' (reasoning).
|
|
112
|
+
|
|
113
|
+
2. Models with a small default num_ctx (e.g. 4K) exhaust their token budget
|
|
114
|
+
during the thinking phase, leaving nothing for the JSON output. We override
|
|
115
|
+
num_ctx to 8192 so thinking-capable models have room to reason AND output
|
|
116
|
+
the full structured response. This override can be raised further if needed.
|
|
117
|
+
"""
|
|
118
|
+
base = dict(base)
|
|
119
|
+
model = base.get("model", "")
|
|
120
|
+
if model.startswith("ollama/"):
|
|
121
|
+
base["model"] = "ollama_chat/" + model[len("ollama/"):]
|
|
122
|
+
# Ensure enough context for thinking + structured JSON output (16K covers full Tier3 prompts)
|
|
123
|
+
options = base.get("extra_body", {}).get("options", {})
|
|
124
|
+
options.setdefault("num_ctx", 16384)
|
|
125
|
+
base.setdefault("extra_body", {})["options"] = options
|
|
126
|
+
# keep_alive=300 holds the model in memory for 5 min so sequential calls don't reload
|
|
127
|
+
base.setdefault("extra_body", {}).setdefault("keep_alive", 300)
|
|
128
|
+
return base
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _ollama_post_process(response: Any, model: str = "") -> str:
|
|
132
|
+
msg = response.choices[0].message
|
|
133
|
+
content = msg.content
|
|
134
|
+
if content:
|
|
135
|
+
content = _strip_think_tags(content)
|
|
136
|
+
if not content:
|
|
137
|
+
# litellm may drop content if reasoning_content is present — try fallback
|
|
138
|
+
fallback = getattr(msg, 'reasoning_content', None) or ""
|
|
139
|
+
fallback = fallback.strip()
|
|
140
|
+
if fallback.startswith('{'):
|
|
141
|
+
content = fallback
|
|
142
|
+
else:
|
|
143
|
+
# BUG-02: return "" instead of raising so Tier 3 retry loop catches it
|
|
144
|
+
# (ValidationError on empty string) and retries rather than propagating.
|
|
145
|
+
logger.warning(
|
|
146
|
+
"Ollama returned empty content and reasoning_content has no JSON — "
|
|
147
|
+
"returning empty string for retry"
|
|
148
|
+
)
|
|
149
|
+
return ""
|
|
150
|
+
return _strip_fences(content)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
OLLAMA_ADAPTER = ModelAdapter(
|
|
154
|
+
name="ollama",
|
|
155
|
+
build_kwargs=_ollama_build_kwargs,
|
|
156
|
+
post_process=_ollama_post_process,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
# NIM_THINKING_ADAPTER — NIM-hosted thinking models (kimi-k2, gpt-oss, etc.)
|
|
162
|
+
# Uses reasoning_content fallback like OLLAMA_ADAPTER but sends no
|
|
163
|
+
# Ollama-specific extra_body fields (options/keep_alive) to the NIM endpoint.
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
NIM_THINKING_ADAPTER = ModelAdapter(
|
|
166
|
+
name="nim_thinking",
|
|
167
|
+
build_kwargs=lambda base: dict(base),
|
|
168
|
+
post_process=_ollama_post_process,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# NEMOTRON_NIM_ADAPTER — nvidia/nemotron-3-super-120b-a12b
|
|
174
|
+
# Requires chat_template_kwargs + reasoning_budget in extra_body, otherwise
|
|
175
|
+
# the server hangs without returning a response.
|
|
176
|
+
# ---------------------------------------------------------------------------
|
|
177
|
+
def _nemotron_build_kwargs(base: dict) -> dict:
|
|
178
|
+
base = dict(base)
|
|
179
|
+
extra = base.setdefault("extra_body", {})
|
|
180
|
+
extra.setdefault("chat_template_kwargs", {"enable_thinking": True})
|
|
181
|
+
extra.setdefault("reasoning_budget", 16384)
|
|
182
|
+
return base
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
NEMOTRON_NIM_ADAPTER = ModelAdapter(
|
|
186
|
+
name="nemotron_nim",
|
|
187
|
+
build_kwargs=_nemotron_build_kwargs,
|
|
188
|
+
post_process=_ollama_post_process,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
# OPENAI_REASONING_ADAPTER — o1, o3, o4, gpt-5 series
|
|
194
|
+
# ---------------------------------------------------------------------------
|
|
195
|
+
def _openai_reasoning_build_kwargs(base: dict) -> dict:
|
|
196
|
+
base = dict(base)
|
|
197
|
+
base.pop("temperature", None)
|
|
198
|
+
return base
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
OPENAI_REASONING_ADAPTER = ModelAdapter(
|
|
202
|
+
name="openai_reasoning",
|
|
203
|
+
build_kwargs=_openai_reasoning_build_kwargs,
|
|
204
|
+
post_process=_default_post_process,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
# ANTHROPIC_ADAPTER — anthropic/ prefix and claude- prefix models
|
|
210
|
+
# ---------------------------------------------------------------------------
|
|
211
|
+
def _anthropic_post_process(response: Any, model: str = "") -> str:
|
|
212
|
+
content = response.choices[0].message.content or ""
|
|
213
|
+
# Never use message.parsed — force raw content path to avoid litellm #20533
|
|
214
|
+
return _strip_fences(content)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
ANTHROPIC_ADAPTER = ModelAdapter(
|
|
218
|
+
name="anthropic",
|
|
219
|
+
build_kwargs=lambda base: dict(base),
|
|
220
|
+
post_process=_anthropic_post_process,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# ---------------------------------------------------------------------------
|
|
225
|
+
# GOOGLE_ADAPTER — gemini/ and vertex_ai/gemini prefixes
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
def _google_post_process(response: Any, model: str = "") -> str:
|
|
228
|
+
content = response.choices[0].message.content
|
|
229
|
+
if not content:
|
|
230
|
+
# BUG-02: return "" instead of raising so Tier 3 retry loop retries.
|
|
231
|
+
logger.warning(
|
|
232
|
+
"Gemini returned empty content (possible safety filter) — "
|
|
233
|
+
"returning empty string for retry"
|
|
234
|
+
)
|
|
235
|
+
return ""
|
|
236
|
+
return _strip_fences(content)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
GOOGLE_ADAPTER = ModelAdapter(
|
|
240
|
+
name="google",
|
|
241
|
+
build_kwargs=lambda base: dict(base),
|
|
242
|
+
post_process=_google_post_process,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
# JSON_OBJECT_ADAPTER — models that support only json_object (not json_schema)
|
|
248
|
+
# e.g. nvidia_nim/microsoft/phi-4-mini-instruct
|
|
249
|
+
# ---------------------------------------------------------------------------
|
|
250
|
+
def _json_object_build_kwargs(base: dict) -> dict:
|
|
251
|
+
base = dict(base)
|
|
252
|
+
base["response_format"] = {"type": "json_object"}
|
|
253
|
+
return base
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
JSON_OBJECT_ADAPTER = ModelAdapter(
|
|
257
|
+
name="json_object",
|
|
258
|
+
build_kwargs=_json_object_build_kwargs,
|
|
259
|
+
post_process=_default_post_process,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
# Registry & Lookup — ordered most-specific to least-specific prefix
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
_REGISTRY: list[tuple[str, ModelAdapter]] = [
|
|
267
|
+
("ollama_chat/", OLLAMA_ADAPTER),
|
|
268
|
+
("ollama/", OLLAMA_ADAPTER),
|
|
269
|
+
# NIM thinking models — emit reasoning_content
|
|
270
|
+
("nvidia_nim/deepseek", NIM_THINKING_ADAPTER), # DeepSeek-R1/V3 on NIM
|
|
271
|
+
("nvidia_nim/nvidia/nemotron-3-super", NEMOTRON_NIM_ADAPTER), # requires chat_template_kwargs
|
|
272
|
+
("nvidia_nim/nvidia/nemotron-3-nano", NEMOTRON_NIM_ADAPTER), # requires chat_template_kwargs
|
|
273
|
+
("nvidia_nim/moonshotai/kimi-k2", NIM_THINKING_ADAPTER), # Kimi K2 thinking
|
|
274
|
+
("nvidia_nim/openai/gpt-oss", NIM_THINKING_ADAPTER), # GPT-OSS thinking
|
|
275
|
+
# NIM json_object-only models
|
|
276
|
+
("nvidia_nim/microsoft/phi-4-mini", JSON_OBJECT_ADAPTER),
|
|
277
|
+
("vertex_ai/gemini", GOOGLE_ADAPTER),
|
|
278
|
+
("gemini/", GOOGLE_ADAPTER),
|
|
279
|
+
("anthropic/", ANTHROPIC_ADAPTER),
|
|
280
|
+
("claude-", ANTHROPIC_ADAPTER),
|
|
281
|
+
("o1", OPENAI_REASONING_ADAPTER),
|
|
282
|
+
("o3", OPENAI_REASONING_ADAPTER),
|
|
283
|
+
("o4", OPENAI_REASONING_ADAPTER),
|
|
284
|
+
("gpt-5", OPENAI_REASONING_ADAPTER),
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_adapter(model: str) -> ModelAdapter:
|
|
289
|
+
"""
|
|
290
|
+
Longest-prefix match against _REGISTRY. Returns DEFAULT_ADAPTER for unrecognized models.
|
|
291
|
+
|
|
292
|
+
The registry is ordered by prefix length (longest first) to ensure that
|
|
293
|
+
'ollama_chat/' matches before 'ollama/' for models like 'ollama_chat/deepseek-r1'.
|
|
294
|
+
"""
|
|
295
|
+
for prefix, adapter in _REGISTRY:
|
|
296
|
+
if model.startswith(prefix):
|
|
297
|
+
return adapter
|
|
298
|
+
return DEFAULT_ADAPTER
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Assertion-style circuit breakers for grounding verification."""
|
|
2
|
+
from groundguard.models.result import GroundingResult, Source
|
|
3
|
+
from groundguard.core.verifier import verify_answer, verify_analysis
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GroundingError(Exception):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def assert_faithful(output: str, sources: list[Source], **kwargs) -> None:
|
|
11
|
+
result = verify_answer(output, sources, **kwargs)
|
|
12
|
+
if not result.is_grounded:
|
|
13
|
+
raise GroundingError(
|
|
14
|
+
f"Output not grounded: score={result.score:.2f}, status={result.status}"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def assert_grounded(analysis: str, sources: list[Source], **kwargs) -> None:
|
|
19
|
+
result = verify_analysis(analysis, sources, **kwargs)
|
|
20
|
+
if not result.is_grounded:
|
|
21
|
+
raise GroundingError(
|
|
22
|
+
f"Analysis not grounded: score={result.score:.2f}"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def verify_or_retry(generator, sources: list[Source], max_retries: int = 3, **kwargs) -> str:
|
|
27
|
+
for attempt in range(max_retries):
|
|
28
|
+
output = generator()
|
|
29
|
+
result = verify_answer(output, sources, **kwargs)
|
|
30
|
+
if result.is_grounded:
|
|
31
|
+
return output
|
|
32
|
+
raise GroundingError(f"Output not grounded after {max_retries} attempts")
|
|
File without changes
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Claim extraction from free-form text using LLM."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import secrets
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
import pydantic
|
|
7
|
+
|
|
8
|
+
from groundguard.exceptions import ParseError
|
|
9
|
+
from groundguard.tiers.tier3_evaluation import _completion_with_backoff, _acompletion_with_backoff
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from groundguard.models.result import Source
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
CLAIM_EXTRACTION_PROMPT = """Extract all distinct factual claims from the text below.
|
|
16
|
+
Return JSON with key "claims" containing a list of strings.
|
|
17
|
+
Each string is one atomic, self-contained factual claim.
|
|
18
|
+
|
|
19
|
+
Text (boundary: {boundary}):
|
|
20
|
+
{text}
|
|
21
|
+
|
|
22
|
+
Sources provided:
|
|
23
|
+
{sources_block}
|
|
24
|
+
|
|
25
|
+
Return only JSON. Example: {{"claims": ["claim 1", "claim 2"]}}"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _ClaimList(pydantic.BaseModel):
|
|
29
|
+
claims: list[str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_claims(
|
|
33
|
+
text: str,
|
|
34
|
+
sources: list,
|
|
35
|
+
model: str,
|
|
36
|
+
max_spend: float = float("inf"),
|
|
37
|
+
api_base: str | None = None,
|
|
38
|
+
) -> list[str]:
|
|
39
|
+
boundary = secrets.token_hex(6)
|
|
40
|
+
sources_block = "\n".join(f"- {s.source_id}: {s.content[:200]}" for s in sources)
|
|
41
|
+
prompt = CLAIM_EXTRACTION_PROMPT.format(
|
|
42
|
+
boundary=boundary, text=text, sources_block=sources_block
|
|
43
|
+
)
|
|
44
|
+
for attempt in range(2):
|
|
45
|
+
try:
|
|
46
|
+
response = _completion_with_backoff(
|
|
47
|
+
model=model,
|
|
48
|
+
messages=[{"role": "user", "content": prompt}],
|
|
49
|
+
**({"api_base": api_base} if api_base else {}),
|
|
50
|
+
)
|
|
51
|
+
content = response.choices[0].message.content
|
|
52
|
+
parsed = _ClaimList.model_validate_json(content)
|
|
53
|
+
return parsed.claims
|
|
54
|
+
except (pydantic.ValidationError, ValueError):
|
|
55
|
+
if attempt == 1:
|
|
56
|
+
raise ParseError("claim extraction failed after 2 attempts")
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def extract_claims_async(
|
|
61
|
+
text: str,
|
|
62
|
+
sources: list,
|
|
63
|
+
model: str,
|
|
64
|
+
max_spend: float = float("inf"),
|
|
65
|
+
api_base: str | None = None,
|
|
66
|
+
) -> list[str]:
|
|
67
|
+
boundary = secrets.token_hex(6)
|
|
68
|
+
sources_block = "\n".join(f"- {s.source_id}: {s.content[:200]}" for s in sources)
|
|
69
|
+
prompt = CLAIM_EXTRACTION_PROMPT.format(
|
|
70
|
+
boundary=boundary, text=text, sources_block=sources_block
|
|
71
|
+
)
|
|
72
|
+
for attempt in range(2):
|
|
73
|
+
try:
|
|
74
|
+
response = await _acompletion_with_backoff(
|
|
75
|
+
model=model,
|
|
76
|
+
messages=[{"role": "user", "content": prompt}],
|
|
77
|
+
**({"api_base": api_base} if api_base else {}),
|
|
78
|
+
)
|
|
79
|
+
content = response.choices[0].message.content
|
|
80
|
+
parsed = _ClaimList.model_validate_json(content)
|
|
81
|
+
return parsed.claims
|
|
82
|
+
except (pydantic.ValidationError, ValueError):
|
|
83
|
+
if attempt == 1:
|
|
84
|
+
raise ParseError("claim extraction async failed after 2 attempts")
|
|
85
|
+
return []
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Tier 0 classifier — rules-based Extractive/Inferential atom classification."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import re
|
|
4
|
+
from groundguard.models.internal import ClassifiedAtom
|
|
5
|
+
|
|
6
|
+
INFERENTIAL_SIGNALS = {
|
|
7
|
+
"trend", "trajectory", "suggests", "indicates", "on track", "at risk",
|
|
8
|
+
"appears to", "likely", "projected", "based on", "derived from",
|
|
9
|
+
"analysis shows", "pattern", "forecast", "outlook", "implies",
|
|
10
|
+
"consistent with", "points to", "expected to",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
# Decimal-safe sentence splitter: splits on [.!?] NOT between two digits, or newlines.
|
|
14
|
+
# Preserves: $4.2M, v2.1, 3.14
|
|
15
|
+
_SENTENCE_SPLIT_RE = re.compile(r'(?<!\d)[.!?](?!\d)\s+|\n+')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_and_classify(claim: str) -> list[ClassifiedAtom]:
|
|
19
|
+
"""
|
|
20
|
+
Zero-cost, zero-LLM heuristic classifier.
|
|
21
|
+
|
|
22
|
+
1. Split claim into atomic sentences using decimal-safe regex.
|
|
23
|
+
2. For each sentence: classify as Inferential if any INFERENTIAL_SIGNALS token
|
|
24
|
+
appears as a case-insensitive whole-word match; otherwise Extractive.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of ClassifiedAtom objects. Empty string returns empty list.
|
|
28
|
+
Punctuation-only input returns empty list (no IndexError).
|
|
29
|
+
"""
|
|
30
|
+
if not claim or not claim.strip():
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
sentences = [s.strip() for s in _SENTENCE_SPLIT_RE.split(claim) if s.strip()]
|
|
34
|
+
|
|
35
|
+
if not sentences:
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
atoms: list[ClassifiedAtom] = []
|
|
39
|
+
for sentence in sentences:
|
|
40
|
+
lower = sentence.lower()
|
|
41
|
+
is_inferential = any(
|
|
42
|
+
re.search(rf'\b{re.escape(signal)}\b', lower)
|
|
43
|
+
for signal in INFERENTIAL_SIGNALS
|
|
44
|
+
)
|
|
45
|
+
atoms.append(ClassifiedAtom(
|
|
46
|
+
claim_text=sentence,
|
|
47
|
+
claim_type="Inferential" if is_inferential else "Extractive",
|
|
48
|
+
))
|
|
49
|
+
|
|
50
|
+
return atoms
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Phase 23 ResultBuilder — citation extraction and invariant enforcement."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from groundguard.models.result import AtomicClaimResult, Citation
|
|
6
|
+
from groundguard.exceptions import InvariantError
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from groundguard.models.result import Source
|
|
10
|
+
from groundguard.tiers.tier25_preprocessing import Tier25Result
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ResultBuilder:
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def build_numerical_fast_exit(claim: str, tier25: Tier25Result, source: Source) -> AtomicClaimResult:
|
|
17
|
+
citation = tier25.conflict_citation
|
|
18
|
+
result = AtomicClaimResult(
|
|
19
|
+
claim_text=claim, claim_type="Extractive", status="CONTRADICTED",
|
|
20
|
+
source_id=source.source_id, verification_method="tier25_numerical", citation=citation,
|
|
21
|
+
)
|
|
22
|
+
ResultBuilder._assert_citation_invariant("CONTRADICTED", citation)
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def build_lexical_pass(claim: str, top_chunks: list, score: float, source: Source) -> AtomicClaimResult:
|
|
27
|
+
if top_chunks:
|
|
28
|
+
chunk = top_chunks[0]
|
|
29
|
+
excerpt_text = chunk.text_content
|
|
30
|
+
char_start = chunk.char_start
|
|
31
|
+
char_end = chunk.char_end
|
|
32
|
+
else:
|
|
33
|
+
excerpt_text = source.content[:100] if source.content else ""
|
|
34
|
+
char_start, char_end = 0, len(excerpt_text)
|
|
35
|
+
|
|
36
|
+
citation = Citation(
|
|
37
|
+
source_id=source.source_id, excerpt=excerpt_text,
|
|
38
|
+
excerpt_char_start=char_start, excerpt_char_end=char_end, citation_confidence=1.0,
|
|
39
|
+
)
|
|
40
|
+
result = AtomicClaimResult(
|
|
41
|
+
claim_text=claim, claim_type="Extractive", status="VERIFIED",
|
|
42
|
+
source_id=source.source_id, verification_method="tier2_lexical", citation=citation,
|
|
43
|
+
)
|
|
44
|
+
ResultBuilder._assert_citation_invariant("VERIFIED", citation)
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def build_llm_result(claim: str, verdict: str, citation: Citation | None = None) -> AtomicClaimResult:
|
|
49
|
+
effective_citation = None if verdict == "UNVERIFIABLE" else citation
|
|
50
|
+
result = AtomicClaimResult(
|
|
51
|
+
claim_text=claim, claim_type="Extractive", status=verdict,
|
|
52
|
+
verification_method="tier3_llm", citation=effective_citation,
|
|
53
|
+
)
|
|
54
|
+
ResultBuilder._assert_citation_invariant(verdict, result.citation)
|
|
55
|
+
return result
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _assert_citation_invariant(verdict: str, citation: Citation | None) -> None:
|
|
59
|
+
if verdict == "VERIFIED" and citation is None:
|
|
60
|
+
raise InvariantError("citation must be non-null for VERIFIED results")
|