cgs-rag 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cgs_rag-0.1.0/PKG-INFO +33 -0
- cgs_rag-0.1.0/cgs_rag/__init__.py +41 -0
- cgs_rag-0.1.0/cgs_rag/calibration.py +172 -0
- cgs_rag-0.1.0/cgs_rag/detector.py +301 -0
- cgs_rag-0.1.0/cgs_rag/result.py +100 -0
- cgs_rag-0.1.0/cgs_rag/signals/__init__.py +5 -0
- cgs_rag-0.1.0/cgs_rag/signals/base.py +45 -0
- cgs_rag-0.1.0/cgs_rag/signals/signal1_token.py +122 -0
- cgs_rag-0.1.0/cgs_rag/signals/signal2_nli.py +115 -0
- cgs_rag-0.1.0/cgs_rag/signals/signal3_cosine.py +85 -0
- cgs_rag-0.1.0/cgs_rag.egg-info/PKG-INFO +33 -0
- cgs_rag-0.1.0/cgs_rag.egg-info/SOURCES.txt +15 -0
- cgs_rag-0.1.0/cgs_rag.egg-info/dependency_links.txt +1 -0
- cgs_rag-0.1.0/cgs_rag.egg-info/requires.txt +13 -0
- cgs_rag-0.1.0/cgs_rag.egg-info/top_level.txt +1 -0
- cgs_rag-0.1.0/pyproject.toml +57 -0
- cgs_rag-0.1.0/setup.cfg +4 -0
cgs_rag-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cgs-rag
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Composite Grounding Score: multi-signal hallucination detection for production RAG systems
|
|
5
|
+
Author-email: Nishant Kumar <nishant.k@marmeto.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/nishant-k-marmeto/cgs-rag
|
|
8
|
+
Project-URL: Repository, https://github.com/nishant-k-marmeto/cgs-rag
|
|
9
|
+
Keywords: rag,retrieval-augmented-generation,hallucination,detection,nlp,llm,grounding,faithfulness
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: torch>=2.0.0
|
|
23
|
+
Requires-Dist: transformers>=4.36.0
|
|
24
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
25
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
26
|
+
Requires-Dist: numpy>=1.24.0
|
|
27
|
+
Requires-Dist: pandas>=2.0.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
31
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
32
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
33
|
+
Requires-Dist: seaborn; extra == "dev"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cgs-rag
|
|
3
|
+
=======
|
|
4
|
+
Composite Grounding Score — multi-signal hallucination detection for
|
|
5
|
+
production RAG systems.
|
|
6
|
+
|
|
7
|
+
Based on the thesis:
|
|
8
|
+
"Composite Grounding Score (CGS): A Multi-Signal Framework for
|
|
9
|
+
Hallucination Detection in Production RAG Systems"
|
|
10
|
+
Nishant Kumar, IIT Patna Executive M.Tech AI & DSE, 2025
|
|
11
|
+
|
|
12
|
+
Quick start
|
|
13
|
+
-----------
|
|
14
|
+
from cgs_rag import CGSDetector
|
|
15
|
+
|
|
16
|
+
detector = CGSDetector()
|
|
17
|
+
result = detector.score(
|
|
18
|
+
question = "What is the capital of France?",
|
|
19
|
+
answer = "Berlin",
|
|
20
|
+
context = "France is a country in Western Europe. Its capital is Paris."
|
|
21
|
+
)
|
|
22
|
+
print(result.risk_score) # e.g. 0.782
|
|
23
|
+
print(result.is_hallucination) # True
|
|
24
|
+
print(result.explain())
|
|
25
|
+
|
|
26
|
+
Domain calibration
|
|
27
|
+
------------------
|
|
28
|
+
import pandas as pd
|
|
29
|
+
val_df = pd.read_csv("my_labelled_rag_data.csv")
|
|
30
|
+
# columns needed: question, answer, context, label (1=hallucinated, 0=grounded)
|
|
31
|
+
|
|
32
|
+
detector.calibrate(val_df)
|
|
33
|
+
# Automatically detects signal direction, optimises weights & threshold.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from .detector import CGSDetector
|
|
37
|
+
from .result import CGSResult
|
|
38
|
+
|
|
39
|
+
__version__ = "0.1.0"
|
|
40
|
+
__author__ = "Nishant Kumar"
|
|
41
|
+
__all__ = ["CGSDetector", "CGSResult"]
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CGS Calibration Utilities
|
|
3
|
+
=========================
|
|
4
|
+
|
|
5
|
+
Three independent steps:
|
|
6
|
+
1. detect_signal_direction — determine whether high cosine = risk or grounding
|
|
7
|
+
2. find_optimal_threshold — grid search over τ to maximise F1
|
|
8
|
+
3. optimise_weights — grid search over (α, β, γ) to maximise AUC
|
|
9
|
+
|
|
10
|
+
All functions operate on numpy arrays and are independent of the detector class,
|
|
11
|
+
making them easy to use standalone or in custom pipelines.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Optional, Tuple
|
|
15
|
+
import numpy as np
|
|
16
|
+
from sklearn.metrics import roc_auc_score, f1_score
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
20
|
+
def detect_signal_direction(
|
|
21
|
+
s3_scores: np.ndarray,
|
|
22
|
+
labels: np.ndarray,
|
|
23
|
+
) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Automatically detect the cosine signal direction on a labelled sample.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
s3_scores : array of cosine similarities (one per sample)
|
|
30
|
+
labels : binary array (1 = hallucinated, 0 = grounded)
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
"cosine_as_risk"
|
|
35
|
+
E[s3 | hallucinated] > E[s3 | grounded]
|
|
36
|
+
→ high cosine means the answer is *suspiciously* close to context
|
|
37
|
+
→ e.g. HaluEval adversarial benchmark construction
|
|
38
|
+
|
|
39
|
+
"cosine_as_grounding"
|
|
40
|
+
E[s3 | grounded] >= E[s3 | hallucinated]
|
|
41
|
+
→ high cosine means the answer is well-supported by context
|
|
42
|
+
→ e.g. real RAG pipelines, TruthfulQA, PubMedQA
|
|
43
|
+
"""
|
|
44
|
+
labels = np.asarray(labels).astype(int)
|
|
45
|
+
s3_scores = np.asarray(s3_scores, dtype=float)
|
|
46
|
+
|
|
47
|
+
mean_hal = s3_scores[labels == 1].mean() if (labels == 1).any() else 0.5
|
|
48
|
+
mean_gnd = s3_scores[labels == 0].mean() if (labels == 0).any() else 0.5
|
|
49
|
+
|
|
50
|
+
if mean_hal > mean_gnd:
|
|
51
|
+
return "cosine_as_risk"
|
|
52
|
+
return "cosine_as_grounding"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
56
|
+
def find_optimal_threshold(
|
|
57
|
+
cgs_risk: np.ndarray,
|
|
58
|
+
labels: np.ndarray,
|
|
59
|
+
n_thresholds: int = 80,
|
|
60
|
+
) -> Tuple[float, float]:
|
|
61
|
+
"""
|
|
62
|
+
Grid search for the decision threshold τ that maximises F1 on the
|
|
63
|
+
provided labelled data.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
cgs_risk : array of CGS risk scores
|
|
68
|
+
labels : binary array (1 = hallucinated, 0 = grounded)
|
|
69
|
+
n_thresholds : number of candidate thresholds to evaluate
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
(best_tau, best_f1)
|
|
74
|
+
"""
|
|
75
|
+
labels = np.asarray(labels).astype(int)
|
|
76
|
+
cgs_risk = np.asarray(cgs_risk, dtype=float)
|
|
77
|
+
|
|
78
|
+
best_f1, best_tau = 0.0, 0.40
|
|
79
|
+
for tau in np.linspace(0.10, 0.90, n_thresholds):
|
|
80
|
+
preds = (cgs_risk >= tau).astype(int)
|
|
81
|
+
f1 = f1_score(labels, preds, zero_division=0)
|
|
82
|
+
if f1 > best_f1:
|
|
83
|
+
best_f1 = f1
|
|
84
|
+
best_tau = float(tau)
|
|
85
|
+
|
|
86
|
+
return best_tau, best_f1
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
90
|
+
def optimise_weights(
|
|
91
|
+
s2_scores: np.ndarray,
|
|
92
|
+
s3_scores: np.ndarray,
|
|
93
|
+
labels: np.ndarray,
|
|
94
|
+
direction: str,
|
|
95
|
+
s1_scores: Optional[np.ndarray] = None,
|
|
96
|
+
step: float = 0.05,
|
|
97
|
+
) -> Tuple[dict, float, float]:
|
|
98
|
+
"""
|
|
99
|
+
Grid search over (α, β, γ) weights — constrained to α + β + γ = 1 —
|
|
100
|
+
to maximise AUC-ROC.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
s2_scores : NLI entailment scores (Signal 2)
|
|
105
|
+
s3_scores : cosine similarity scores (Signal 3)
|
|
106
|
+
labels : binary array (1 = hallucinated, 0 = grounded)
|
|
107
|
+
direction : "cosine_as_risk" or "cosine_as_grounding"
|
|
108
|
+
s1_scores : token log-prob confidence (Signal 1); None = Lite mode
|
|
109
|
+
step : grid step size (smaller = finer search, slower)
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
(weights_dict, optimal_threshold, best_auc)
|
|
114
|
+
where weights_dict = {"alpha": ..., "beta": ..., "gamma": ...}
|
|
115
|
+
"""
|
|
116
|
+
labels = np.asarray(labels).astype(int)
|
|
117
|
+
s2_scores = np.asarray(s2_scores, dtype=float)
|
|
118
|
+
s3_scores = np.asarray(s3_scores, dtype=float)
|
|
119
|
+
|
|
120
|
+
# Apply signal direction to s3
|
|
121
|
+
s3_risk = s3_scores if direction == "cosine_as_risk" else (1.0 - s3_scores)
|
|
122
|
+
|
|
123
|
+
best_auc = 0.0
|
|
124
|
+
best_weights = {"alpha": 0.0, "beta": 0.15, "gamma": 0.85}
|
|
125
|
+
best_tau = 0.40
|
|
126
|
+
|
|
127
|
+
grid = np.arange(0.0, 1.0 + step / 2, step)
|
|
128
|
+
|
|
129
|
+
if s1_scores is not None:
|
|
130
|
+
s1_scores = np.asarray(s1_scores, dtype=float)
|
|
131
|
+
for alpha in grid:
|
|
132
|
+
for beta in grid:
|
|
133
|
+
gamma = 1.0 - alpha - beta
|
|
134
|
+
if gamma < -0.001 or gamma > 1.001:
|
|
135
|
+
continue
|
|
136
|
+
gamma = max(0.0, min(1.0, gamma))
|
|
137
|
+
risk = alpha * (1.0 - s1_scores) + beta * (1.0 - s2_scores) + gamma * s3_risk
|
|
138
|
+
try:
|
|
139
|
+
auc = roc_auc_score(labels, risk)
|
|
140
|
+
except ValueError:
|
|
141
|
+
continue
|
|
142
|
+
if auc > best_auc:
|
|
143
|
+
best_auc = auc
|
|
144
|
+
tau, _ = find_optimal_threshold(risk, labels)
|
|
145
|
+
best_tau = tau
|
|
146
|
+
best_weights = {
|
|
147
|
+
"alpha": round(float(alpha), 3),
|
|
148
|
+
"beta": round(float(beta), 3),
|
|
149
|
+
"gamma": round(float(gamma), 3),
|
|
150
|
+
}
|
|
151
|
+
else:
|
|
152
|
+
# Lite mode: α fixed at 0, search over β + γ = 1
|
|
153
|
+
for beta in grid:
|
|
154
|
+
gamma = 1.0 - beta
|
|
155
|
+
if gamma < 0:
|
|
156
|
+
continue
|
|
157
|
+
risk = beta * (1.0 - s2_scores) + gamma * s3_risk
|
|
158
|
+
try:
|
|
159
|
+
auc = roc_auc_score(labels, risk)
|
|
160
|
+
except ValueError:
|
|
161
|
+
continue
|
|
162
|
+
if auc > best_auc:
|
|
163
|
+
best_auc = auc
|
|
164
|
+
tau, _ = find_optimal_threshold(risk, labels)
|
|
165
|
+
best_tau = tau
|
|
166
|
+
best_weights = {
|
|
167
|
+
"alpha": 0.0,
|
|
168
|
+
"beta": round(float(beta), 3),
|
|
169
|
+
"gamma": round(float(gamma), 3),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return best_weights, best_tau, round(best_auc, 4)
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CGSDetector — the main public class.
|
|
3
|
+
|
|
4
|
+
Quick start
|
|
5
|
+
-----------
|
|
6
|
+
from cgs_rag import CGSDetector
|
|
7
|
+
|
|
8
|
+
detector = CGSDetector() # auto-detect mode
|
|
9
|
+
result = detector.score(q, answer, context) # single response
|
|
10
|
+
results = detector.score_batch(qs, ans, ctxs) # batch
|
|
11
|
+
detector.calibrate(val_df) # adapt to your domain
|
|
12
|
+
|
|
13
|
+
Modes
|
|
14
|
+
-----
|
|
15
|
+
"auto" Uses S1+S2+S3 (Full) if Ollama is reachable, else S2+S3 (Lite).
|
|
16
|
+
"lite" S2 (NLI) + S3 (cosine). No LLM required. Recommended default.
|
|
17
|
+
"full" S1 (token log-prob) + S2 + S3. Requires local Ollama instance.
|
|
18
|
+
|
|
19
|
+
Direction
|
|
20
|
+
---------
|
|
21
|
+
"cosine_as_risk" : high cosine → hallucinated (HaluEval adversarial regime)
|
|
22
|
+
"cosine_as_grounding": high cosine → grounded (real RAG / TruthfulQA)
|
|
23
|
+
|
|
24
|
+
If you don't call calibrate(), the default direction is "cosine_as_grounding"
|
|
25
|
+
(the natural RAG regime), which is correct for most real-world deployments.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from typing import List, Optional, Union
|
|
33
|
+
|
|
34
|
+
from .result import CGSResult
|
|
35
|
+
from .signals.signal1_token import TokenLogProbSignal
|
|
36
|
+
from .signals.signal2_nli import NLISignal
|
|
37
|
+
from .signals.signal3_cosine import CosineSignal
|
|
38
|
+
from .calibration import (
|
|
39
|
+
detect_signal_direction,
|
|
40
|
+
find_optimal_threshold,
|
|
41
|
+
optimise_weights,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# ── Thesis-validated defaults (HaluEval test set) ────────────────────────────
|
|
45
|
+
_DEFAULT_WEIGHTS = {"alpha": 0.0, "beta": 0.15, "gamma": 0.85}
|
|
46
|
+
_DEFAULT_THRESHOLD = 0.40
|
|
47
|
+
_DEFAULT_DIRECTION = "cosine_as_grounding" # safe default for real RAG
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CGSDetector:
|
|
51
|
+
"""
|
|
52
|
+
Composite Grounding Score hallucination detector.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
mode : "auto" | "lite" | "full"
|
|
57
|
+
nli_model : HuggingFace model ID for Signal 2
|
|
58
|
+
cosine_model : SentenceTransformer model ID for Signal 3
|
|
59
|
+
ollama_model : Ollama model name for Signal 1
|
|
60
|
+
ollama_url : URL of the local Ollama service
|
|
61
|
+
threshold : decision threshold τ (overridden by calibrate())
|
|
62
|
+
weights : {"alpha", "beta", "gamma"} weight dict
|
|
63
|
+
direction : "cosine_as_risk" or "cosine_as_grounding"
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
mode: str = "auto",
|
|
69
|
+
nli_model: str = "cross-encoder/nli-deberta-v3-small",
|
|
70
|
+
cosine_model: str = "all-MiniLM-L6-v2",
|
|
71
|
+
ollama_model: str = "llama3.2",
|
|
72
|
+
ollama_url: str = "http://localhost:11434",
|
|
73
|
+
threshold: float = _DEFAULT_THRESHOLD,
|
|
74
|
+
weights: Optional[dict] = None,
|
|
75
|
+
direction: str = _DEFAULT_DIRECTION,
|
|
76
|
+
):
|
|
77
|
+
self.mode = mode
|
|
78
|
+
self.threshold = threshold
|
|
79
|
+
self.weights = dict(weights) if weights else dict(_DEFAULT_WEIGHTS)
|
|
80
|
+
self.direction = direction
|
|
81
|
+
self._calibrated = False
|
|
82
|
+
|
|
83
|
+
# Signals are lazy-loaded on first use
|
|
84
|
+
self._s1 = TokenLogProbSignal(model_name=ollama_model, ollama_url=ollama_url)
|
|
85
|
+
self._s2 = NLISignal(model_name=nli_model)
|
|
86
|
+
self._s3 = CosineSignal(model_name=cosine_model)
|
|
87
|
+
|
|
88
|
+
# ── Mode resolution ──────────────────────────────────────────────────────
|
|
89
|
+
@property
|
|
90
|
+
def active_mode(self) -> str:
|
|
91
|
+
"""Resolves "auto" to "full" or "lite" based on Ollama availability."""
|
|
92
|
+
if self.mode == "auto":
|
|
93
|
+
return "full" if self._s1.is_available else "lite"
|
|
94
|
+
return self.mode
|
|
95
|
+
|
|
96
|
+
# ── Internal risk computation ─────────────────────────────────────────────
|
|
97
|
+
def _compute_risk(self, s1: float, s2: float, s3: float) -> float:
|
|
98
|
+
s3_risk = s3 if self.direction == "cosine_as_risk" else (1.0 - s3)
|
|
99
|
+
a = self.weights.get("alpha", 0.0)
|
|
100
|
+
b = self.weights.get("beta", 0.15)
|
|
101
|
+
g = self.weights.get("gamma", 0.85)
|
|
102
|
+
return float(np.clip(a * (1.0 - s1) + b * (1.0 - s2) + g * s3_risk, 0.0, 1.0))
|
|
103
|
+
|
|
104
|
+
# ── Public API ────────────────────────────────────────────────────────────
|
|
105
|
+
def score(self, question: str, answer: str, context: str) -> CGSResult:
|
|
106
|
+
"""
|
|
107
|
+
Score a single RAG response.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
question : the user's question
|
|
112
|
+
answer : the RAG system's generated answer
|
|
113
|
+
context : the retrieved context passage(s)
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
CGSResult with risk_score, is_hallucination, signals, and explain()
|
|
118
|
+
"""
|
|
119
|
+
s2 = self._s2.score(question, answer, context)
|
|
120
|
+
s3 = self._s3.score(question, answer, context)
|
|
121
|
+
s1 = self._s1.score(question, answer, context) if self.active_mode == "full" else 0.5
|
|
122
|
+
|
|
123
|
+
risk = self._compute_risk(s1, s2, s3)
|
|
124
|
+
signals = {
|
|
125
|
+
"s2_nli": round(s2, 4),
|
|
126
|
+
"s3_cosine": round(s3, 4),
|
|
127
|
+
}
|
|
128
|
+
if self.active_mode == "full":
|
|
129
|
+
signals["s1_logprob"] = round(s1, 4)
|
|
130
|
+
|
|
131
|
+
return CGSResult(
|
|
132
|
+
risk_score = round(risk, 4),
|
|
133
|
+
is_hallucination = risk >= self.threshold,
|
|
134
|
+
threshold = self.threshold,
|
|
135
|
+
mode = self.active_mode,
|
|
136
|
+
signals = signals,
|
|
137
|
+
direction = self.direction,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# ── Batch scoring ─────────────────────────────────────────────────────────
|
|
141
|
+
def score_batch(
|
|
142
|
+
self,
|
|
143
|
+
questions: List[str],
|
|
144
|
+
answers: List[str],
|
|
145
|
+
contexts: List[str],
|
|
146
|
+
) -> List[CGSResult]:
|
|
147
|
+
"""
|
|
148
|
+
Score a batch of RAG responses.
|
|
149
|
+
Uses true batch inference for S2 and S3 (much faster than looping).
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
List of CGSResult objects (same order as input).
|
|
154
|
+
"""
|
|
155
|
+
n = len(questions)
|
|
156
|
+
assert len(answers) == n and len(contexts) == n, \
|
|
157
|
+
"questions, answers, contexts must have the same length"
|
|
158
|
+
|
|
159
|
+
# Batch signal computation
|
|
160
|
+
s2_arr = np.array(self._s2.score_batch(questions, answers, contexts))
|
|
161
|
+
s3_arr = np.array(self._s3.score_batch(questions, answers, contexts))
|
|
162
|
+
|
|
163
|
+
if self.active_mode == "full":
|
|
164
|
+
s1_arr = np.array(self._s1.score_batch(questions, answers, contexts))
|
|
165
|
+
else:
|
|
166
|
+
s1_arr = np.full(n, 0.5)
|
|
167
|
+
|
|
168
|
+
results = []
|
|
169
|
+
for i in range(n):
|
|
170
|
+
risk = self._compute_risk(s1_arr[i], s2_arr[i], s3_arr[i])
|
|
171
|
+
signals = {
|
|
172
|
+
"s2_nli": round(float(s2_arr[i]), 4),
|
|
173
|
+
"s3_cosine": round(float(s3_arr[i]), 4),
|
|
174
|
+
}
|
|
175
|
+
if self.active_mode == "full":
|
|
176
|
+
signals["s1_logprob"] = round(float(s1_arr[i]), 4)
|
|
177
|
+
|
|
178
|
+
results.append(CGSResult(
|
|
179
|
+
risk_score = round(float(risk), 4),
|
|
180
|
+
is_hallucination = float(risk) >= self.threshold,
|
|
181
|
+
threshold = self.threshold,
|
|
182
|
+
mode = self.active_mode,
|
|
183
|
+
signals = signals,
|
|
184
|
+
direction = self.direction,
|
|
185
|
+
))
|
|
186
|
+
return results
|
|
187
|
+
|
|
188
|
+
# ── Domain calibration ────────────────────────────────────────────────────
|
|
189
|
+
def calibrate(
|
|
190
|
+
self,
|
|
191
|
+
val_df: pd.DataFrame,
|
|
192
|
+
question_col: str = "question",
|
|
193
|
+
answer_col: str = "answer",
|
|
194
|
+
context_col: str = "context",
|
|
195
|
+
label_col: str = "label",
|
|
196
|
+
verbose: bool = True,
|
|
197
|
+
) -> dict:
|
|
198
|
+
"""
|
|
199
|
+
Calibrate CGS weights, threshold, and direction to your domain.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
val_df : labelled DataFrame. Needs columns:
|
|
204
|
+
question, answer, context, label (1=hallucinated, 0=grounded)
|
|
205
|
+
question_col : column name for questions
|
|
206
|
+
answer_col : column name for answers
|
|
207
|
+
context_col : column name for contexts
|
|
208
|
+
label_col : column name for binary labels (1=hallucinated, 0=grounded)
|
|
209
|
+
verbose : print progress and results
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
dict with {"weights", "threshold", "direction", "auc", "n_samples"}
|
|
214
|
+
|
|
215
|
+
Side effects
|
|
216
|
+
------------
|
|
217
|
+
Updates self.weights, self.threshold, self.direction, self._calibrated
|
|
218
|
+
"""
|
|
219
|
+
if verbose:
|
|
220
|
+
print(f"[CGS] Calibrating on {len(val_df):,} samples …")
|
|
221
|
+
|
|
222
|
+
qs = val_df[question_col].tolist()
|
|
223
|
+
ans = val_df[answer_col].tolist()
|
|
224
|
+
ctx = val_df[context_col].tolist()
|
|
225
|
+
lbl = val_df[label_col].values.astype(int)
|
|
226
|
+
|
|
227
|
+
if verbose: print("[CGS] Computing S3 (cosine) …")
|
|
228
|
+
s3 = np.array(self._s3.score_batch(qs, ans, ctx))
|
|
229
|
+
|
|
230
|
+
if verbose: print("[CGS] Computing S2 (NLI) …")
|
|
231
|
+
s2 = np.array(self._s2.score_batch(qs, ans, ctx))
|
|
232
|
+
|
|
233
|
+
s1 = None
|
|
234
|
+
if self.active_mode == "full":
|
|
235
|
+
if verbose: print("[CGS] Computing S1 (token log-prob) …")
|
|
236
|
+
s1 = np.array(self._s1.score_batch(qs, ans, ctx))
|
|
237
|
+
|
|
238
|
+
# Direction auto-detection
|
|
239
|
+
self.direction = detect_signal_direction(s3, lbl)
|
|
240
|
+
if verbose:
|
|
241
|
+
print(f"[CGS] Direction detected: {self.direction}")
|
|
242
|
+
mean_hal = s3[lbl == 1].mean()
|
|
243
|
+
mean_gnd = s3[lbl == 0].mean()
|
|
244
|
+
print(f" E[s3|hal]={mean_hal:.3f} E[s3|gnd]={mean_gnd:.3f}")
|
|
245
|
+
|
|
246
|
+
# Weight + threshold optimisation
|
|
247
|
+
self.weights, self.threshold, best_auc = optimise_weights(
|
|
248
|
+
s2, s3, lbl, self.direction, s1
|
|
249
|
+
)
|
|
250
|
+
self._calibrated = True
|
|
251
|
+
|
|
252
|
+
result = {
|
|
253
|
+
"weights": self.weights,
|
|
254
|
+
"threshold": round(self.threshold, 3),
|
|
255
|
+
"direction": self.direction,
|
|
256
|
+
"auc": best_auc,
|
|
257
|
+
"n_samples": len(val_df),
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if verbose:
|
|
261
|
+
print(f"[CGS] Calibration complete:")
|
|
262
|
+
print(f" Weights : α={self.weights['alpha']:.2f} "
|
|
263
|
+
f"β={self.weights['beta']:.2f} γ={self.weights['gamma']:.2f}")
|
|
264
|
+
print(f" Threshold: τ = {self.threshold:.2f}")
|
|
265
|
+
print(f" AUC : {best_auc:.4f}")
|
|
266
|
+
|
|
267
|
+
return result
|
|
268
|
+
|
|
269
|
+
# ── Serialisation ─────────────────────────────────────────────────────────
|
|
270
|
+
def get_config(self) -> dict:
|
|
271
|
+
"""Export calibrated configuration for storage / reproducibility."""
|
|
272
|
+
return {
|
|
273
|
+
"mode": self.active_mode,
|
|
274
|
+
"weights": self.weights,
|
|
275
|
+
"threshold": self.threshold,
|
|
276
|
+
"direction": self.direction,
|
|
277
|
+
"calibrated": self._calibrated,
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
@classmethod
|
|
281
|
+
def from_config(cls, config: dict, **kwargs) -> "CGSDetector":
|
|
282
|
+
"""Reconstruct a calibrated detector from a saved config dict."""
|
|
283
|
+
detector = cls(
|
|
284
|
+
mode = config.get("mode", "auto"),
|
|
285
|
+
weights = config.get("weights"),
|
|
286
|
+
threshold = config.get("threshold", _DEFAULT_THRESHOLD),
|
|
287
|
+
direction = config.get("direction", _DEFAULT_DIRECTION),
|
|
288
|
+
**kwargs,
|
|
289
|
+
)
|
|
290
|
+
detector._calibrated = config.get("calibrated", False)
|
|
291
|
+
return detector
|
|
292
|
+
|
|
293
|
+
# ── Repr ──────────────────────────────────────────────────────────────────
|
|
294
|
+
def __repr__(self) -> str:
|
|
295
|
+
return (
|
|
296
|
+
f"CGSDetector("
|
|
297
|
+
f"mode={self.active_mode!r}, "
|
|
298
|
+
f"τ={self.threshold:.2f}, "
|
|
299
|
+
f"direction={self.direction!r}, "
|
|
300
|
+
f"calibrated={self._calibrated})"
|
|
301
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CGSResult — the return type for every detector.score() call.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class CGSResult:
|
|
11
|
+
"""
|
|
12
|
+
Result of a single CGS hallucination detection call.
|
|
13
|
+
|
|
14
|
+
Attributes
|
|
15
|
+
----------
|
|
16
|
+
risk_score : float in [0, 1]. Higher = more likely hallucinated.
|
|
17
|
+
is_hallucination : True if risk_score >= threshold.
|
|
18
|
+
threshold : decision threshold used for this call.
|
|
19
|
+
mode : "lite" (S2+S3) or "full" (S1+S2+S3).
|
|
20
|
+
signals : per-signal scores {"s2_nli": ..., "s3_cosine": ...}.
|
|
21
|
+
direction : "cosine_as_risk" (HaluEval/adversarial) or
|
|
22
|
+
"cosine_as_grounding" (natural RAG / TruthfulQA).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
risk_score: float
|
|
26
|
+
is_hallucination: bool
|
|
27
|
+
threshold: float
|
|
28
|
+
mode: str
|
|
29
|
+
signals: Dict[str, float]
|
|
30
|
+
direction: str
|
|
31
|
+
|
|
32
|
+
# ------------------------------------------------------------------ #
|
|
33
|
+
def explain(self) -> str:
|
|
34
|
+
"""Return a human-readable breakdown of the score."""
|
|
35
|
+
verdict = "HALLUCINATION ⚠️" if self.is_hallucination else "GROUNDED ✅"
|
|
36
|
+
lines = [
|
|
37
|
+
f"CGS Risk Score : {self.risk_score:.3f} → {verdict}",
|
|
38
|
+
f"Threshold : {self.threshold:.2f} | Mode: {self.mode} | Direction: {self.direction}",
|
|
39
|
+
"",
|
|
40
|
+
"Signal breakdown:",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
if "s1_logprob" in self.signals:
|
|
44
|
+
s1 = self.signals["s1_logprob"]
|
|
45
|
+
lines.append(f" S1 Token confidence : {s1:.3f}"
|
|
46
|
+
f" ({'high confidence' if s1 > 0.7 else 'low confidence'})")
|
|
47
|
+
|
|
48
|
+
s2 = self.signals.get("s2_nli", None)
|
|
49
|
+
if s2 is not None:
|
|
50
|
+
tag = "entailed" if s2 > 0.6 else ("neutral" if s2 > 0.35 else "contradicted")
|
|
51
|
+
lines.append(f" S2 NLI faithfulness : {s2:.3f} ({tag})")
|
|
52
|
+
|
|
53
|
+
s3 = self.signals.get("s3_cosine", None)
|
|
54
|
+
if s3 is not None:
|
|
55
|
+
if self.direction == "cosine_as_risk":
|
|
56
|
+
tag = "suspicious — answer borrows context vocabulary" if s3 > 0.6 else "normal"
|
|
57
|
+
else:
|
|
58
|
+
tag = "well-grounded" if s3 > 0.6 else "semantically distant from context"
|
|
59
|
+
lines.append(f" S3 Cosine similarity : {s3:.3f} ({tag})")
|
|
60
|
+
|
|
61
|
+
lines.append("")
|
|
62
|
+
lines.append("Interpretation:")
|
|
63
|
+
|
|
64
|
+
if self.is_hallucination:
|
|
65
|
+
reasons = []
|
|
66
|
+
if s3 is not None:
|
|
67
|
+
if self.direction == "cosine_as_risk" and s3 > 0.65:
|
|
68
|
+
reasons.append("answer vocabulary closely mirrors context (adversarial distractor pattern)")
|
|
69
|
+
elif self.direction == "cosine_as_grounding" and s3 < 0.35:
|
|
70
|
+
reasons.append("answer is semantically distant from the retrieved context")
|
|
71
|
+
if s2 is not None and s2 < 0.35:
|
|
72
|
+
reasons.append("NLI model does not support the answer given the context")
|
|
73
|
+
if reasons:
|
|
74
|
+
for r in reasons:
|
|
75
|
+
lines.append(f" → {r}")
|
|
76
|
+
else:
|
|
77
|
+
lines.append(" → Combined signal weight exceeds threshold.")
|
|
78
|
+
else:
|
|
79
|
+
lines.append(" → No strong hallucination signal detected.")
|
|
80
|
+
|
|
81
|
+
return "\n".join(lines)
|
|
82
|
+
|
|
83
|
+
# ------------------------------------------------------------------ #
|
|
84
|
+
def to_dict(self) -> dict:
|
|
85
|
+
"""Serialisable dict for logging / storage."""
|
|
86
|
+
return {
|
|
87
|
+
"risk_score": self.risk_score,
|
|
88
|
+
"is_hallucination": self.is_hallucination,
|
|
89
|
+
"threshold": self.threshold,
|
|
90
|
+
"mode": self.mode,
|
|
91
|
+
"direction": self.direction,
|
|
92
|
+
**self.signals,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
def __repr__(self) -> str:
|
|
96
|
+
return (
|
|
97
|
+
f"CGSResult(risk={self.risk_score:.3f}, "
|
|
98
|
+
f"hal={self.is_hallucination}, "
|
|
99
|
+
f"mode={self.mode!r})"
|
|
100
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Abstract base class for all CGS signals."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseSignal(ABC):
|
|
8
|
+
"""
|
|
9
|
+
Every signal must implement:
|
|
10
|
+
- is_available (property) : True if the signal's dependencies are installed/reachable
|
|
11
|
+
- score() : single sample → float in [0, 1]
|
|
12
|
+
- score_batch() : list of samples → list of floats
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def is_available(self) -> bool:
|
|
18
|
+
"""Return True if this signal can be computed (deps installed, service up)."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def score(self, question: str, answer: str, context: str) -> float:
|
|
22
|
+
"""
|
|
23
|
+
Compute a raw signal value for one QA sample.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
float in [0, 1].
|
|
28
|
+
The *direction* of the signal (whether high = hallucinated or high = grounded)
|
|
29
|
+
is handled by CGSDetector, not here.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def score_batch(
|
|
33
|
+
self,
|
|
34
|
+
questions: List[str],
|
|
35
|
+
answers: List[str],
|
|
36
|
+
contexts: List[str],
|
|
37
|
+
) -> List[float]:
|
|
38
|
+
"""
|
|
39
|
+
Default batch implementation: loop over samples.
|
|
40
|
+
Subclasses may override for true batching (e.g. SentenceTransformer).
|
|
41
|
+
"""
|
|
42
|
+
return [
|
|
43
|
+
self.score(q, a, c)
|
|
44
|
+
for q, a, c in zip(questions, answers, contexts)
|
|
45
|
+
]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal 1 — Token Log-Probability Confidence.
|
|
3
|
+
|
|
4
|
+
Queries a locally-running Ollama instance to obtain token-level log-probabilities
|
|
5
|
+
for the generated answer given the question and context. Converts the geometric
|
|
6
|
+
mean log-prob to a confidence score in [0, 1].
|
|
7
|
+
|
|
8
|
+
High score = model is highly confident = less likely to be a hallucination.
|
|
9
|
+
Low score = model is uncertain = hallucination risk signal.
|
|
10
|
+
|
|
11
|
+
Requirements
|
|
12
|
+
------------
|
|
13
|
+
- Ollama installed and running (https://ollama.com)
|
|
14
|
+
- The model pulled: ``ollama pull llama3.2``
|
|
15
|
+
|
|
16
|
+
Graceful fallback
|
|
17
|
+
-----------------
|
|
18
|
+
If Ollama is not reachable, ``is_available`` returns False and the detector
|
|
19
|
+
automatically falls back to Lite mode (S2+S3 only). ``score()`` returns 0.5
|
|
20
|
+
(neutral) so existing weights stay valid if called directly.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import math
|
|
25
|
+
import urllib.request
|
|
26
|
+
import urllib.error
|
|
27
|
+
from typing import List
|
|
28
|
+
|
|
29
|
+
from .base import BaseSignal
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TokenLogProbSignal(BaseSignal):
|
|
33
|
+
"""Token log-probability confidence via Ollama."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
model_name: str = "llama3.2",
|
|
38
|
+
ollama_url: str = "http://localhost:11434",
|
|
39
|
+
timeout_sec: int = 30,
|
|
40
|
+
):
|
|
41
|
+
self.model_name = model_name
|
|
42
|
+
self.ollama_url = ollama_url.rstrip("/")
|
|
43
|
+
self.timeout_sec = timeout_sec
|
|
44
|
+
|
|
45
|
+
# ------------------------------------------------------------------ #
|
|
46
|
+
@property
|
|
47
|
+
def is_available(self) -> bool:
|
|
48
|
+
"""Ping the Ollama /api/tags endpoint to check if the service is up."""
|
|
49
|
+
try:
|
|
50
|
+
req = urllib.request.Request(
|
|
51
|
+
f"{self.ollama_url}/api/tags",
|
|
52
|
+
method="GET",
|
|
53
|
+
)
|
|
54
|
+
with urllib.request.urlopen(req, timeout=2):
|
|
55
|
+
return True
|
|
56
|
+
except Exception:
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
# ------------------------------------------------------------------ #
|
|
60
|
+
def score(self, question: str, answer: str, context: str) -> float:
|
|
61
|
+
"""
|
|
62
|
+
Returns a calibrated confidence score in [0, 1].
|
|
63
|
+
|
|
64
|
+
Prompt structure:
|
|
65
|
+
Context: <context>
|
|
66
|
+
Question: <question>
|
|
67
|
+
Answer: <answer>
|
|
68
|
+
|
|
69
|
+
We ask Ollama to *continue* the prompt (generation mode) and collect
|
|
70
|
+
the log-probs of the answer tokens, computing the geometric mean
|
|
71
|
+
probability as the confidence score.
|
|
72
|
+
"""
|
|
73
|
+
prompt = (
|
|
74
|
+
f"Context: {context}\n\n"
|
|
75
|
+
f"Question: {question}\n\n"
|
|
76
|
+
f"Answer: {answer}"
|
|
77
|
+
)
|
|
78
|
+
payload = json.dumps({
|
|
79
|
+
"model": self.model_name,
|
|
80
|
+
"prompt": prompt,
|
|
81
|
+
"stream": False,
|
|
82
|
+
"options": {
|
|
83
|
+
"logprobs": True,
|
|
84
|
+
"temperature": 0, # deterministic
|
|
85
|
+
"num_predict": 1, # we only need the continuation start
|
|
86
|
+
},
|
|
87
|
+
}).encode("utf-8")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
req = urllib.request.Request(
|
|
91
|
+
f"{self.ollama_url}/api/generate",
|
|
92
|
+
data=payload,
|
|
93
|
+
headers={"Content-Type": "application/json"},
|
|
94
|
+
method="POST",
|
|
95
|
+
)
|
|
96
|
+
with urllib.request.urlopen(req, timeout=self.timeout_sec) as resp:
|
|
97
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
98
|
+
|
|
99
|
+
logprobs: list = data.get("logprobs") or []
|
|
100
|
+
if not logprobs:
|
|
101
|
+
return 0.5 # logprobs not returned by this model
|
|
102
|
+
|
|
103
|
+
# Geometric mean probability = exp(mean log-prob)
|
|
104
|
+
avg_lp = sum(logprobs) / len(logprobs)
|
|
105
|
+
conf = math.exp(avg_lp) # in (0, 1]
|
|
106
|
+
return max(0.0, min(1.0, conf))
|
|
107
|
+
|
|
108
|
+
except (urllib.error.URLError, json.JSONDecodeError, KeyError):
|
|
109
|
+
return 0.5 # graceful fallback — neutral confidence
|
|
110
|
+
|
|
111
|
+
# ------------------------------------------------------------------ #
|
|
112
|
+
def score_batch(
|
|
113
|
+
self,
|
|
114
|
+
questions: List[str],
|
|
115
|
+
answers: List[str],
|
|
116
|
+
contexts: List[str],
|
|
117
|
+
) -> List[float]:
|
|
118
|
+
"""Sequential batch (Ollama does not expose a true batch endpoint)."""
|
|
119
|
+
return [
|
|
120
|
+
self.score(q, a, c)
|
|
121
|
+
for q, a, c in zip(questions, answers, contexts)
|
|
122
|
+
]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal 2 — NLI Faithfulness Score.
|
|
3
|
+
|
|
4
|
+
Uses cross-encoder/nli-deberta-v3-small to compute the probability that the
|
|
5
|
+
retrieved context *entails* the generated answer.
|
|
6
|
+
|
|
7
|
+
Hypothesis template (from thesis):
|
|
8
|
+
"The answer to '{question}' is: {answer}."
|
|
9
|
+
|
|
10
|
+
Returns the ENTAILMENT probability in [0, 1].
|
|
11
|
+
High score = answer is entailed by context = grounded.
|
|
12
|
+
Low score = answer contradicts or is unrelated to context = suspicious.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from .base import BaseSignal
|
|
18
|
+
|
|
19
|
+
# DeBERTa NLI label sets differ by model variant — handle all possibilities.
|
|
20
|
+
_ENTAIL_LABELS = {"entailment", "entail", "label_2", "yes"}
|
|
21
|
+
_CONTRA_LABELS = {"contradiction", "contradict", "label_0", "no"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NLISignal(BaseSignal):
|
|
25
|
+
"""NLI entailment probability between context (premise) and answer (hypothesis)."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, model_name: str = "cross-encoder/nli-deberta-v3-small"):
|
|
28
|
+
self.model_name = model_name
|
|
29
|
+
self._pipe = None # lazy load
|
|
30
|
+
|
|
31
|
+
# ------------------------------------------------------------------ #
|
|
32
|
+
@property
|
|
33
|
+
def is_available(self) -> bool:
|
|
34
|
+
try:
|
|
35
|
+
import transformers # noqa: F401
|
|
36
|
+
return True
|
|
37
|
+
except ImportError:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------ #
|
|
41
|
+
def _load(self) -> None:
|
|
42
|
+
if self._pipe is None:
|
|
43
|
+
from transformers import pipeline
|
|
44
|
+
self._pipe = pipeline(
|
|
45
|
+
"text-classification",
|
|
46
|
+
model=self.model_name,
|
|
47
|
+
top_k=None, # return all label scores
|
|
48
|
+
truncation=True,
|
|
49
|
+
max_length=512,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------ #
|
|
53
|
+
def score(self, question: str, answer: str, context: str) -> float:
|
|
54
|
+
"""
|
|
55
|
+
Returns entailment probability in [0, 1].
|
|
56
|
+
Higher = answer is more faithfully supported by context.
|
|
57
|
+
"""
|
|
58
|
+
self._load()
|
|
59
|
+
premise = str(context)
|
|
60
|
+
hypothesis = f"The answer to '{question}' is: {answer}."
|
|
61
|
+
|
|
62
|
+
raw = self._pipe(
|
|
63
|
+
{"text": premise, "text_pair": hypothesis}
|
|
64
|
+
)
|
|
65
|
+
# raw is a list of dicts: [{"label": "ENTAILMENT", "score": 0.92}, ...]
|
|
66
|
+
return self._extract_entailment(raw)
|
|
67
|
+
|
|
68
|
+
# ------------------------------------------------------------------ #
|
|
69
|
+
def _extract_entailment(self, raw) -> float:
|
|
70
|
+
"""
|
|
71
|
+
Robustly extract entailment probability from the pipeline output,
|
|
72
|
+
regardless of how the model labels its classes.
|
|
73
|
+
"""
|
|
74
|
+
if not raw:
|
|
75
|
+
return 0.5
|
|
76
|
+
|
|
77
|
+
# Normalise to a list if wrapped in an extra list
|
|
78
|
+
items = raw[0] if (isinstance(raw[0], list)) else raw
|
|
79
|
+
|
|
80
|
+
label_score = {}
|
|
81
|
+
for item in items:
|
|
82
|
+
label_score[item["label"].lower()] = float(item["score"])
|
|
83
|
+
|
|
84
|
+
# Try to find entailment label directly
|
|
85
|
+
for lbl in _ENTAIL_LABELS:
|
|
86
|
+
if lbl in label_score:
|
|
87
|
+
return label_score[lbl]
|
|
88
|
+
|
|
89
|
+
# Fallback: 1 - contradiction_score (works when model uses 3 classes)
|
|
90
|
+
for lbl in _CONTRA_LABELS:
|
|
91
|
+
if lbl in label_score:
|
|
92
|
+
neutral_score = sum(
|
|
93
|
+
v for k, v in label_score.items()
|
|
94
|
+
if k not in _CONTRA_LABELS
|
|
95
|
+
)
|
|
96
|
+
return max(0.0, min(1.0, neutral_score))
|
|
97
|
+
|
|
98
|
+
# Last resort: assume highest-scoring label is entailment
|
|
99
|
+
return max(label_score.values()) if label_score else 0.5
|
|
100
|
+
|
|
101
|
+
# ------------------------------------------------------------------ #
|
|
102
|
+
def score_batch(
|
|
103
|
+
self,
|
|
104
|
+
questions: List[str],
|
|
105
|
+
answers: List[str],
|
|
106
|
+
contexts: List[str],
|
|
107
|
+
) -> List[float]:
|
|
108
|
+
"""Batch using transformers pipeline (handles internal batching)."""
|
|
109
|
+
self._load()
|
|
110
|
+
inputs = [
|
|
111
|
+
{"text": str(c), "text_pair": f"The answer to '{q}' is: {a}."}
|
|
112
|
+
for q, a, c in zip(questions, answers, contexts)
|
|
113
|
+
]
|
|
114
|
+
results = self._pipe(inputs)
|
|
115
|
+
return [self._extract_entailment(r) for r in results]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal 3 — Semantic Attribution via Cosine Similarity.
|
|
3
|
+
|
|
4
|
+
Uses all-MiniLM-L6-v2 (384-dim) to embed the answer and the retrieved context,
|
|
5
|
+
then computes their cosine similarity.
|
|
6
|
+
|
|
7
|
+
Direction note
|
|
8
|
+
--------------
|
|
9
|
+
This signal returns raw cosine similarity in [0, 1].
|
|
10
|
+
The CGSDetector decides whether high cosine = risk or high cosine = grounding
|
|
11
|
+
based on the calibrated ``direction`` attribute.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import List
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from .base import BaseSignal
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CosineSignal(BaseSignal):
|
|
21
|
+
"""Cosine similarity between answer embedding and context embedding."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
24
|
+
self.model_name = model_name
|
|
25
|
+
self._model = None # lazy load
|
|
26
|
+
|
|
27
|
+
# ------------------------------------------------------------------ #
|
|
28
|
+
@property
|
|
29
|
+
def is_available(self) -> bool:
|
|
30
|
+
try:
|
|
31
|
+
import sentence_transformers # noqa: F401
|
|
32
|
+
return True
|
|
33
|
+
except ImportError:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
# ------------------------------------------------------------------ #
|
|
37
|
+
def _load(self) -> None:
|
|
38
|
+
if self._model is None:
|
|
39
|
+
from sentence_transformers import SentenceTransformer
|
|
40
|
+
self._model = SentenceTransformer(self.model_name)
|
|
41
|
+
|
|
42
|
+
# ------------------------------------------------------------------ #
|
|
43
|
+
def score(self, question: str, answer: str, context: str) -> float:
|
|
44
|
+
"""
|
|
45
|
+
Returns cosine similarity between answer and context in [0, 1].
|
|
46
|
+
question is accepted but not used (kept for API consistency).
|
|
47
|
+
"""
|
|
48
|
+
self._load()
|
|
49
|
+
embs = self._model.encode(
|
|
50
|
+
[str(answer), str(context)],
|
|
51
|
+
convert_to_numpy=True,
|
|
52
|
+
show_progress_bar=False,
|
|
53
|
+
)
|
|
54
|
+
a, b = embs[0], embs[1]
|
|
55
|
+
norm = np.linalg.norm(a) * np.linalg.norm(b)
|
|
56
|
+
if norm < 1e-10:
|
|
57
|
+
return 0.0
|
|
58
|
+
cosine = float(np.dot(a, b) / norm)
|
|
59
|
+
return max(0.0, min(1.0, cosine))
|
|
60
|
+
|
|
61
|
+
# ------------------------------------------------------------------ #
|
|
62
|
+
def score_batch(
|
|
63
|
+
self,
|
|
64
|
+
questions: List[str],
|
|
65
|
+
answers: List[str],
|
|
66
|
+
contexts: List[str],
|
|
67
|
+
) -> List[float]:
|
|
68
|
+
"""
|
|
69
|
+
True batch: encode all answers and contexts in two forward passes,
|
|
70
|
+
then compute pairwise cosine. Much faster than looping.
|
|
71
|
+
"""
|
|
72
|
+
self._load()
|
|
73
|
+
all_answers = [str(a) for a in answers]
|
|
74
|
+
all_contexts = [str(c) for c in contexts]
|
|
75
|
+
|
|
76
|
+
emb_ans = self._model.encode(all_answers, convert_to_numpy=True, show_progress_bar=False)
|
|
77
|
+
emb_ctx = self._model.encode(all_contexts, convert_to_numpy=True, show_progress_bar=False)
|
|
78
|
+
|
|
79
|
+
# Row-wise cosine
|
|
80
|
+
norms_ans = np.linalg.norm(emb_ans, axis=1, keepdims=True).clip(min=1e-10)
|
|
81
|
+
norms_ctx = np.linalg.norm(emb_ctx, axis=1, keepdims=True).clip(min=1e-10)
|
|
82
|
+
emb_ans_n = emb_ans / norms_ans
|
|
83
|
+
emb_ctx_n = emb_ctx / norms_ctx
|
|
84
|
+
cosines = (emb_ans_n * emb_ctx_n).sum(axis=1)
|
|
85
|
+
return [max(0.0, min(1.0, float(c))) for c in cosines]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cgs-rag
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Composite Grounding Score: multi-signal hallucination detection for production RAG systems
|
|
5
|
+
Author-email: Nishant Kumar <nishant.k@marmeto.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/nishant-k-marmeto/cgs-rag
|
|
8
|
+
Project-URL: Repository, https://github.com/nishant-k-marmeto/cgs-rag
|
|
9
|
+
Keywords: rag,retrieval-augmented-generation,hallucination,detection,nlp,llm,grounding,faithfulness
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: torch>=2.0.0
|
|
23
|
+
Requires-Dist: transformers>=4.36.0
|
|
24
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
25
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
26
|
+
Requires-Dist: numpy>=1.24.0
|
|
27
|
+
Requires-Dist: pandas>=2.0.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
31
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
32
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
33
|
+
Requires-Dist: seaborn; extra == "dev"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
cgs_rag/__init__.py
|
|
3
|
+
cgs_rag/calibration.py
|
|
4
|
+
cgs_rag/detector.py
|
|
5
|
+
cgs_rag/result.py
|
|
6
|
+
cgs_rag.egg-info/PKG-INFO
|
|
7
|
+
cgs_rag.egg-info/SOURCES.txt
|
|
8
|
+
cgs_rag.egg-info/dependency_links.txt
|
|
9
|
+
cgs_rag.egg-info/requires.txt
|
|
10
|
+
cgs_rag.egg-info/top_level.txt
|
|
11
|
+
cgs_rag/signals/__init__.py
|
|
12
|
+
cgs_rag/signals/base.py
|
|
13
|
+
cgs_rag/signals/signal1_token.py
|
|
14
|
+
cgs_rag/signals/signal2_nli.py
|
|
15
|
+
cgs_rag/signals/signal3_cosine.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cgs_rag
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cgs-rag"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Composite Grounding Score: multi-signal hallucination detection for production RAG systems"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [{name = "Nishant Kumar", email = "nishant.k@marmeto.com"}]
|
|
12
|
+
|
|
13
|
+
keywords = [
|
|
14
|
+
"rag", "retrieval-augmented-generation",
|
|
15
|
+
"hallucination", "detection",
|
|
16
|
+
"nlp", "llm", "grounding", "faithfulness",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"Intended Audience :: Science/Research",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Programming Language :: Python :: 3.9",
|
|
26
|
+
"Programming Language :: Python :: 3.10",
|
|
27
|
+
"Programming Language :: Python :: 3.11",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"License :: OSI Approved :: MIT License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
dependencies = [
|
|
34
|
+
"torch>=2.0.0",
|
|
35
|
+
"transformers>=4.36.0",
|
|
36
|
+
"sentence-transformers>=2.2.0",
|
|
37
|
+
"scikit-learn>=1.3.0",
|
|
38
|
+
"numpy>=1.24.0",
|
|
39
|
+
"pandas>=2.0.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7.0",
|
|
45
|
+
"pytest-cov",
|
|
46
|
+
"jupyter",
|
|
47
|
+
"matplotlib",
|
|
48
|
+
"seaborn",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://github.com/nishant-k-marmeto/cgs-rag"
|
|
53
|
+
Repository = "https://github.com/nishant-k-marmeto/cgs-rag"
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.packages.find]
|
|
56
|
+
where = ["."]
|
|
57
|
+
include = ["cgs_rag*"]
|
cgs_rag-0.1.0/setup.cfg
ADDED