cgs-rag 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cgs_rag-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: cgs-rag
3
+ Version: 0.1.0
4
+ Summary: Composite Grounding Score: multi-signal hallucination detection for production RAG systems
5
+ Author-email: Nishant Kumar <nishant.k@marmeto.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/nishant-k-marmeto/cgs-rag
8
+ Project-URL: Repository, https://github.com/nishant-k-marmeto/cgs-rag
9
+ Keywords: rag,retrieval-augmented-generation,hallucination,detection,nlp,llm,grounding,faithfulness
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: torch>=2.0.0
23
+ Requires-Dist: transformers>=4.36.0
24
+ Requires-Dist: sentence-transformers>=2.2.0
25
+ Requires-Dist: scikit-learn>=1.3.0
26
+ Requires-Dist: numpy>=1.24.0
27
+ Requires-Dist: pandas>=2.0.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Requires-Dist: pytest-cov; extra == "dev"
31
+ Requires-Dist: jupyter; extra == "dev"
32
+ Requires-Dist: matplotlib; extra == "dev"
33
+ Requires-Dist: seaborn; extra == "dev"
@@ -0,0 +1,41 @@
1
+ """
2
+ cgs-rag
3
+ =======
4
+ Composite Grounding Score — multi-signal hallucination detection for
5
+ production RAG systems.
6
+
7
+ Based on the thesis:
8
+ "Composite Grounding Score (CGS): A Multi-Signal Framework for
9
+ Hallucination Detection in Production RAG Systems"
10
+ Nishant Kumar, IIT Patna Executive M.Tech AI & DSE, 2025
11
+
12
+ Quick start
13
+ -----------
14
+ from cgs_rag import CGSDetector
15
+
16
+ detector = CGSDetector()
17
+ result = detector.score(
18
+ question = "What is the capital of France?",
19
+ answer = "Berlin",
20
+ context = "France is a country in Western Europe. Its capital is Paris."
21
+ )
22
+ print(result.risk_score) # e.g. 0.782
23
+ print(result.is_hallucination) # True
24
+ print(result.explain())
25
+
26
+ Domain calibration
27
+ ------------------
28
+ import pandas as pd
29
+ val_df = pd.read_csv("my_labelled_rag_data.csv")
30
+ # columns needed: question, answer, context, label (1=hallucinated, 0=grounded)
31
+
32
+ detector.calibrate(val_df)
33
+ # Automatically detects signal direction, optimises weights & threshold.
34
+ """
35
+
36
+ from .detector import CGSDetector
37
+ from .result import CGSResult
38
+
39
+ __version__ = "0.1.0"
40
+ __author__ = "Nishant Kumar"
41
+ __all__ = ["CGSDetector", "CGSResult"]
@@ -0,0 +1,172 @@
1
+ """
2
+ CGS Calibration Utilities
3
+ =========================
4
+
5
+ Three independent steps:
6
+ 1. detect_signal_direction — determine whether high cosine = risk or grounding
7
+ 2. find_optimal_threshold — grid search over τ to maximise F1
8
+ 3. optimise_weights — grid search over (α, β, γ) to maximise AUC
9
+
10
+ All functions operate on numpy arrays and are independent of the detector class,
11
+ making them easy to use standalone or in custom pipelines.
12
+ """
13
+
14
+ from typing import Optional, Tuple
15
+ import numpy as np
16
+ from sklearn.metrics import roc_auc_score, f1_score
17
+
18
+
19
+ # ─────────────────────────────────────────────────────────────────────────────
20
+ def detect_signal_direction(
21
+ s3_scores: np.ndarray,
22
+ labels: np.ndarray,
23
+ ) -> str:
24
+ """
25
+ Automatically detect the cosine signal direction on a labelled sample.
26
+
27
+ Parameters
28
+ ----------
29
+ s3_scores : array of cosine similarities (one per sample)
30
+ labels : binary array (1 = hallucinated, 0 = grounded)
31
+
32
+ Returns
33
+ -------
34
+ "cosine_as_risk"
35
+ E[s3 | hallucinated] > E[s3 | grounded]
36
+ → high cosine means the answer is *suspiciously* close to context
37
+ → e.g. HaluEval adversarial benchmark construction
38
+
39
+ "cosine_as_grounding"
40
+ E[s3 | grounded] >= E[s3 | hallucinated]
41
+ → high cosine means the answer is well-supported by context
42
+ → e.g. real RAG pipelines, TruthfulQA, PubMedQA
43
+ """
44
+ labels = np.asarray(labels).astype(int)
45
+ s3_scores = np.asarray(s3_scores, dtype=float)
46
+
47
+ mean_hal = s3_scores[labels == 1].mean() if (labels == 1).any() else 0.5
48
+ mean_gnd = s3_scores[labels == 0].mean() if (labels == 0).any() else 0.5
49
+
50
+ if mean_hal > mean_gnd:
51
+ return "cosine_as_risk"
52
+ return "cosine_as_grounding"
53
+
54
+
55
+ # ─────────────────────────────────────────────────────────────────────────────
56
+ def find_optimal_threshold(
57
+ cgs_risk: np.ndarray,
58
+ labels: np.ndarray,
59
+ n_thresholds: int = 80,
60
+ ) -> Tuple[float, float]:
61
+ """
62
+ Grid search for the decision threshold τ that maximises F1 on the
63
+ provided labelled data.
64
+
65
+ Parameters
66
+ ----------
67
+ cgs_risk : array of CGS risk scores
68
+ labels : binary array (1 = hallucinated, 0 = grounded)
69
+ n_thresholds : number of candidate thresholds to evaluate
70
+
71
+ Returns
72
+ -------
73
+ (best_tau, best_f1)
74
+ """
75
+ labels = np.asarray(labels).astype(int)
76
+ cgs_risk = np.asarray(cgs_risk, dtype=float)
77
+
78
+ best_f1, best_tau = 0.0, 0.40
79
+ for tau in np.linspace(0.10, 0.90, n_thresholds):
80
+ preds = (cgs_risk >= tau).astype(int)
81
+ f1 = f1_score(labels, preds, zero_division=0)
82
+ if f1 > best_f1:
83
+ best_f1 = f1
84
+ best_tau = float(tau)
85
+
86
+ return best_tau, best_f1
87
+
88
+
89
+ # ─────────────────────────────────────────────────────────────────────────────
90
+ def optimise_weights(
91
+ s2_scores: np.ndarray,
92
+ s3_scores: np.ndarray,
93
+ labels: np.ndarray,
94
+ direction: str,
95
+ s1_scores: Optional[np.ndarray] = None,
96
+ step: float = 0.05,
97
+ ) -> Tuple[dict, float, float]:
98
+ """
99
+ Grid search over (α, β, γ) weights — constrained to α + β + γ = 1 —
100
+ to maximise AUC-ROC.
101
+
102
+ Parameters
103
+ ----------
104
+ s2_scores : NLI entailment scores (Signal 2)
105
+ s3_scores : cosine similarity scores (Signal 3)
106
+ labels : binary array (1 = hallucinated, 0 = grounded)
107
+ direction : "cosine_as_risk" or "cosine_as_grounding"
108
+ s1_scores : token log-prob confidence (Signal 1); None = Lite mode
109
+ step : grid step size (smaller = finer search, slower)
110
+
111
+ Returns
112
+ -------
113
+ (weights_dict, optimal_threshold, best_auc)
114
+ where weights_dict = {"alpha": ..., "beta": ..., "gamma": ...}
115
+ """
116
+ labels = np.asarray(labels).astype(int)
117
+ s2_scores = np.asarray(s2_scores, dtype=float)
118
+ s3_scores = np.asarray(s3_scores, dtype=float)
119
+
120
+ # Apply signal direction to s3
121
+ s3_risk = s3_scores if direction == "cosine_as_risk" else (1.0 - s3_scores)
122
+
123
+ best_auc = 0.0
124
+ best_weights = {"alpha": 0.0, "beta": 0.15, "gamma": 0.85}
125
+ best_tau = 0.40
126
+
127
+ grid = np.arange(0.0, 1.0 + step / 2, step)
128
+
129
+ if s1_scores is not None:
130
+ s1_scores = np.asarray(s1_scores, dtype=float)
131
+ for alpha in grid:
132
+ for beta in grid:
133
+ gamma = 1.0 - alpha - beta
134
+ if gamma < -0.001 or gamma > 1.001:
135
+ continue
136
+ gamma = max(0.0, min(1.0, gamma))
137
+ risk = alpha * (1.0 - s1_scores) + beta * (1.0 - s2_scores) + gamma * s3_risk
138
+ try:
139
+ auc = roc_auc_score(labels, risk)
140
+ except ValueError:
141
+ continue
142
+ if auc > best_auc:
143
+ best_auc = auc
144
+ tau, _ = find_optimal_threshold(risk, labels)
145
+ best_tau = tau
146
+ best_weights = {
147
+ "alpha": round(float(alpha), 3),
148
+ "beta": round(float(beta), 3),
149
+ "gamma": round(float(gamma), 3),
150
+ }
151
+ else:
152
+ # Lite mode: α fixed at 0, search over β + γ = 1
153
+ for beta in grid:
154
+ gamma = 1.0 - beta
155
+ if gamma < 0:
156
+ continue
157
+ risk = beta * (1.0 - s2_scores) + gamma * s3_risk
158
+ try:
159
+ auc = roc_auc_score(labels, risk)
160
+ except ValueError:
161
+ continue
162
+ if auc > best_auc:
163
+ best_auc = auc
164
+ tau, _ = find_optimal_threshold(risk, labels)
165
+ best_tau = tau
166
+ best_weights = {
167
+ "alpha": 0.0,
168
+ "beta": round(float(beta), 3),
169
+ "gamma": round(float(gamma), 3),
170
+ }
171
+
172
+ return best_weights, best_tau, round(best_auc, 4)
@@ -0,0 +1,301 @@
1
+ """
2
+ CGSDetector — the main public class.
3
+
4
+ Quick start
5
+ -----------
6
+ from cgs_rag import CGSDetector
7
+
8
+ detector = CGSDetector() # auto-detect mode
9
+ result = detector.score(q, answer, context) # single response
10
+ results = detector.score_batch(qs, ans, ctxs) # batch
11
+ detector.calibrate(val_df) # adapt to your domain
12
+
13
+ Modes
14
+ -----
15
+ "auto" Uses S1+S2+S3 (Full) if Ollama is reachable, else S2+S3 (Lite).
16
+ "lite" S2 (NLI) + S3 (cosine). No LLM required. Recommended default.
17
+ "full" S1 (token log-prob) + S2 + S3. Requires local Ollama instance.
18
+
19
+ Direction
20
+ ---------
21
+ "cosine_as_risk" : high cosine → hallucinated (HaluEval adversarial regime)
22
+ "cosine_as_grounding": high cosine → grounded (real RAG / TruthfulQA)
23
+
24
+ If you don't call calibrate(), the default direction is "cosine_as_grounding"
25
+ (the natural RAG regime), which is correct for most real-world deployments.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import numpy as np
31
+ import pandas as pd
32
+ from typing import List, Optional, Union
33
+
34
+ from .result import CGSResult
35
+ from .signals.signal1_token import TokenLogProbSignal
36
+ from .signals.signal2_nli import NLISignal
37
+ from .signals.signal3_cosine import CosineSignal
38
+ from .calibration import (
39
+ detect_signal_direction,
40
+ find_optimal_threshold,
41
+ optimise_weights,
42
+ )
43
+
44
+ # ── Thesis-validated defaults (HaluEval test set) ────────────────────────────
45
+ _DEFAULT_WEIGHTS = {"alpha": 0.0, "beta": 0.15, "gamma": 0.85}
46
+ _DEFAULT_THRESHOLD = 0.40
47
+ _DEFAULT_DIRECTION = "cosine_as_grounding" # safe default for real RAG
48
+
49
+
50
+ class CGSDetector:
51
+ """
52
+ Composite Grounding Score hallucination detector.
53
+
54
+ Parameters
55
+ ----------
56
+ mode : "auto" | "lite" | "full"
57
+ nli_model : HuggingFace model ID for Signal 2
58
+ cosine_model : SentenceTransformer model ID for Signal 3
59
+ ollama_model : Ollama model name for Signal 1
60
+ ollama_url : URL of the local Ollama service
61
+ threshold : decision threshold τ (overridden by calibrate())
62
+ weights : {"alpha", "beta", "gamma"} weight dict
63
+ direction : "cosine_as_risk" or "cosine_as_grounding"
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ mode: str = "auto",
69
+ nli_model: str = "cross-encoder/nli-deberta-v3-small",
70
+ cosine_model: str = "all-MiniLM-L6-v2",
71
+ ollama_model: str = "llama3.2",
72
+ ollama_url: str = "http://localhost:11434",
73
+ threshold: float = _DEFAULT_THRESHOLD,
74
+ weights: Optional[dict] = None,
75
+ direction: str = _DEFAULT_DIRECTION,
76
+ ):
77
+ self.mode = mode
78
+ self.threshold = threshold
79
+ self.weights = dict(weights) if weights else dict(_DEFAULT_WEIGHTS)
80
+ self.direction = direction
81
+ self._calibrated = False
82
+
83
+ # Signals are lazy-loaded on first use
84
+ self._s1 = TokenLogProbSignal(model_name=ollama_model, ollama_url=ollama_url)
85
+ self._s2 = NLISignal(model_name=nli_model)
86
+ self._s3 = CosineSignal(model_name=cosine_model)
87
+
88
+ # ── Mode resolution ──────────────────────────────────────────────────────
89
+ @property
90
+ def active_mode(self) -> str:
91
+ """Resolves "auto" to "full" or "lite" based on Ollama availability."""
92
+ if self.mode == "auto":
93
+ return "full" if self._s1.is_available else "lite"
94
+ return self.mode
95
+
96
+ # ── Internal risk computation ─────────────────────────────────────────────
97
+ def _compute_risk(self, s1: float, s2: float, s3: float) -> float:
98
+ s3_risk = s3 if self.direction == "cosine_as_risk" else (1.0 - s3)
99
+ a = self.weights.get("alpha", 0.0)
100
+ b = self.weights.get("beta", 0.15)
101
+ g = self.weights.get("gamma", 0.85)
102
+ return float(np.clip(a * (1.0 - s1) + b * (1.0 - s2) + g * s3_risk, 0.0, 1.0))
103
+
104
+ # ── Public API ────────────────────────────────────────────────────────────
105
+ def score(self, question: str, answer: str, context: str) -> CGSResult:
106
+ """
107
+ Score a single RAG response.
108
+
109
+ Parameters
110
+ ----------
111
+ question : the user's question
112
+ answer : the RAG system's generated answer
113
+ context : the retrieved context passage(s)
114
+
115
+ Returns
116
+ -------
117
+ CGSResult with risk_score, is_hallucination, signals, and explain()
118
+ """
119
+ s2 = self._s2.score(question, answer, context)
120
+ s3 = self._s3.score(question, answer, context)
121
+ s1 = self._s1.score(question, answer, context) if self.active_mode == "full" else 0.5
122
+
123
+ risk = self._compute_risk(s1, s2, s3)
124
+ signals = {
125
+ "s2_nli": round(s2, 4),
126
+ "s3_cosine": round(s3, 4),
127
+ }
128
+ if self.active_mode == "full":
129
+ signals["s1_logprob"] = round(s1, 4)
130
+
131
+ return CGSResult(
132
+ risk_score = round(risk, 4),
133
+ is_hallucination = risk >= self.threshold,
134
+ threshold = self.threshold,
135
+ mode = self.active_mode,
136
+ signals = signals,
137
+ direction = self.direction,
138
+ )
139
+
140
+ # ── Batch scoring ─────────────────────────────────────────────────────────
141
+ def score_batch(
142
+ self,
143
+ questions: List[str],
144
+ answers: List[str],
145
+ contexts: List[str],
146
+ ) -> List[CGSResult]:
147
+ """
148
+ Score a batch of RAG responses.
149
+ Uses true batch inference for S2 and S3 (much faster than looping).
150
+
151
+ Returns
152
+ -------
153
+ List of CGSResult objects (same order as input).
154
+ """
155
+ n = len(questions)
156
+ assert len(answers) == n and len(contexts) == n, \
157
+ "questions, answers, contexts must have the same length"
158
+
159
+ # Batch signal computation
160
+ s2_arr = np.array(self._s2.score_batch(questions, answers, contexts))
161
+ s3_arr = np.array(self._s3.score_batch(questions, answers, contexts))
162
+
163
+ if self.active_mode == "full":
164
+ s1_arr = np.array(self._s1.score_batch(questions, answers, contexts))
165
+ else:
166
+ s1_arr = np.full(n, 0.5)
167
+
168
+ results = []
169
+ for i in range(n):
170
+ risk = self._compute_risk(s1_arr[i], s2_arr[i], s3_arr[i])
171
+ signals = {
172
+ "s2_nli": round(float(s2_arr[i]), 4),
173
+ "s3_cosine": round(float(s3_arr[i]), 4),
174
+ }
175
+ if self.active_mode == "full":
176
+ signals["s1_logprob"] = round(float(s1_arr[i]), 4)
177
+
178
+ results.append(CGSResult(
179
+ risk_score = round(float(risk), 4),
180
+ is_hallucination = float(risk) >= self.threshold,
181
+ threshold = self.threshold,
182
+ mode = self.active_mode,
183
+ signals = signals,
184
+ direction = self.direction,
185
+ ))
186
+ return results
187
+
188
+ # ── Domain calibration ────────────────────────────────────────────────────
189
+ def calibrate(
190
+ self,
191
+ val_df: pd.DataFrame,
192
+ question_col: str = "question",
193
+ answer_col: str = "answer",
194
+ context_col: str = "context",
195
+ label_col: str = "label",
196
+ verbose: bool = True,
197
+ ) -> dict:
198
+ """
199
+ Calibrate CGS weights, threshold, and direction to your domain.
200
+
201
+ Parameters
202
+ ----------
203
+ val_df : labelled DataFrame. Needs columns:
204
+ question, answer, context, label (1=hallucinated, 0=grounded)
205
+ question_col : column name for questions
206
+ answer_col : column name for answers
207
+ context_col : column name for contexts
208
+ label_col : column name for binary labels (1=hallucinated, 0=grounded)
209
+ verbose : print progress and results
210
+
211
+ Returns
212
+ -------
213
+ dict with {"weights", "threshold", "direction", "auc", "n_samples"}
214
+
215
+ Side effects
216
+ ------------
217
+ Updates self.weights, self.threshold, self.direction, self._calibrated
218
+ """
219
+ if verbose:
220
+ print(f"[CGS] Calibrating on {len(val_df):,} samples …")
221
+
222
+ qs = val_df[question_col].tolist()
223
+ ans = val_df[answer_col].tolist()
224
+ ctx = val_df[context_col].tolist()
225
+ lbl = val_df[label_col].values.astype(int)
226
+
227
+ if verbose: print("[CGS] Computing S3 (cosine) …")
228
+ s3 = np.array(self._s3.score_batch(qs, ans, ctx))
229
+
230
+ if verbose: print("[CGS] Computing S2 (NLI) …")
231
+ s2 = np.array(self._s2.score_batch(qs, ans, ctx))
232
+
233
+ s1 = None
234
+ if self.active_mode == "full":
235
+ if verbose: print("[CGS] Computing S1 (token log-prob) …")
236
+ s1 = np.array(self._s1.score_batch(qs, ans, ctx))
237
+
238
+ # Direction auto-detection
239
+ self.direction = detect_signal_direction(s3, lbl)
240
+ if verbose:
241
+ print(f"[CGS] Direction detected: {self.direction}")
242
+ mean_hal = s3[lbl == 1].mean()
243
+ mean_gnd = s3[lbl == 0].mean()
244
+ print(f" E[s3|hal]={mean_hal:.3f} E[s3|gnd]={mean_gnd:.3f}")
245
+
246
+ # Weight + threshold optimisation
247
+ self.weights, self.threshold, best_auc = optimise_weights(
248
+ s2, s3, lbl, self.direction, s1
249
+ )
250
+ self._calibrated = True
251
+
252
+ result = {
253
+ "weights": self.weights,
254
+ "threshold": round(self.threshold, 3),
255
+ "direction": self.direction,
256
+ "auc": best_auc,
257
+ "n_samples": len(val_df),
258
+ }
259
+
260
+ if verbose:
261
+ print(f"[CGS] Calibration complete:")
262
+ print(f" Weights : α={self.weights['alpha']:.2f} "
263
+ f"β={self.weights['beta']:.2f} γ={self.weights['gamma']:.2f}")
264
+ print(f" Threshold: τ = {self.threshold:.2f}")
265
+ print(f" AUC : {best_auc:.4f}")
266
+
267
+ return result
268
+
269
+ # ── Serialisation ─────────────────────────────────────────────────────────
270
+ def get_config(self) -> dict:
271
+ """Export calibrated configuration for storage / reproducibility."""
272
+ return {
273
+ "mode": self.active_mode,
274
+ "weights": self.weights,
275
+ "threshold": self.threshold,
276
+ "direction": self.direction,
277
+ "calibrated": self._calibrated,
278
+ }
279
+
280
+ @classmethod
281
+ def from_config(cls, config: dict, **kwargs) -> "CGSDetector":
282
+ """Reconstruct a calibrated detector from a saved config dict."""
283
+ detector = cls(
284
+ mode = config.get("mode", "auto"),
285
+ weights = config.get("weights"),
286
+ threshold = config.get("threshold", _DEFAULT_THRESHOLD),
287
+ direction = config.get("direction", _DEFAULT_DIRECTION),
288
+ **kwargs,
289
+ )
290
+ detector._calibrated = config.get("calibrated", False)
291
+ return detector
292
+
293
+ # ── Repr ──────────────────────────────────────────────────────────────────
294
+ def __repr__(self) -> str:
295
+ return (
296
+ f"CGSDetector("
297
+ f"mode={self.active_mode!r}, "
298
+ f"τ={self.threshold:.2f}, "
299
+ f"direction={self.direction!r}, "
300
+ f"calibrated={self._calibrated})"
301
+ )
@@ -0,0 +1,100 @@
1
+ """
2
+ CGSResult — the return type for every detector.score() call.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Dict
7
+
8
+
9
+ @dataclass
10
+ class CGSResult:
11
+ """
12
+ Result of a single CGS hallucination detection call.
13
+
14
+ Attributes
15
+ ----------
16
+ risk_score : float in [0, 1]. Higher = more likely hallucinated.
17
+ is_hallucination : True if risk_score >= threshold.
18
+ threshold : decision threshold used for this call.
19
+ mode : "lite" (S2+S3) or "full" (S1+S2+S3).
20
+ signals : per-signal scores {"s2_nli": ..., "s3_cosine": ...}.
21
+ direction : "cosine_as_risk" (HaluEval/adversarial) or
22
+ "cosine_as_grounding" (natural RAG / TruthfulQA).
23
+ """
24
+
25
+ risk_score: float
26
+ is_hallucination: bool
27
+ threshold: float
28
+ mode: str
29
+ signals: Dict[str, float]
30
+ direction: str
31
+
32
+ # ------------------------------------------------------------------ #
33
+ def explain(self) -> str:
34
+ """Return a human-readable breakdown of the score."""
35
+ verdict = "HALLUCINATION ⚠️" if self.is_hallucination else "GROUNDED ✅"
36
+ lines = [
37
+ f"CGS Risk Score : {self.risk_score:.3f} → {verdict}",
38
+ f"Threshold : {self.threshold:.2f} | Mode: {self.mode} | Direction: {self.direction}",
39
+ "",
40
+ "Signal breakdown:",
41
+ ]
42
+
43
+ if "s1_logprob" in self.signals:
44
+ s1 = self.signals["s1_logprob"]
45
+ lines.append(f" S1 Token confidence : {s1:.3f}"
46
+ f" ({'high confidence' if s1 > 0.7 else 'low confidence'})")
47
+
48
+ s2 = self.signals.get("s2_nli", None)
49
+ if s2 is not None:
50
+ tag = "entailed" if s2 > 0.6 else ("neutral" if s2 > 0.35 else "contradicted")
51
+ lines.append(f" S2 NLI faithfulness : {s2:.3f} ({tag})")
52
+
53
+ s3 = self.signals.get("s3_cosine", None)
54
+ if s3 is not None:
55
+ if self.direction == "cosine_as_risk":
56
+ tag = "suspicious — answer borrows context vocabulary" if s3 > 0.6 else "normal"
57
+ else:
58
+ tag = "well-grounded" if s3 > 0.6 else "semantically distant from context"
59
+ lines.append(f" S3 Cosine similarity : {s3:.3f} ({tag})")
60
+
61
+ lines.append("")
62
+ lines.append("Interpretation:")
63
+
64
+ if self.is_hallucination:
65
+ reasons = []
66
+ if s3 is not None:
67
+ if self.direction == "cosine_as_risk" and s3 > 0.65:
68
+ reasons.append("answer vocabulary closely mirrors context (adversarial distractor pattern)")
69
+ elif self.direction == "cosine_as_grounding" and s3 < 0.35:
70
+ reasons.append("answer is semantically distant from the retrieved context")
71
+ if s2 is not None and s2 < 0.35:
72
+ reasons.append("NLI model does not support the answer given the context")
73
+ if reasons:
74
+ for r in reasons:
75
+ lines.append(f" → {r}")
76
+ else:
77
+ lines.append(" → Combined signal weight exceeds threshold.")
78
+ else:
79
+ lines.append(" → No strong hallucination signal detected.")
80
+
81
+ return "\n".join(lines)
82
+
83
+ # ------------------------------------------------------------------ #
84
+ def to_dict(self) -> dict:
85
+ """Serialisable dict for logging / storage."""
86
+ return {
87
+ "risk_score": self.risk_score,
88
+ "is_hallucination": self.is_hallucination,
89
+ "threshold": self.threshold,
90
+ "mode": self.mode,
91
+ "direction": self.direction,
92
+ **self.signals,
93
+ }
94
+
95
+ def __repr__(self) -> str:
96
+ return (
97
+ f"CGSResult(risk={self.risk_score:.3f}, "
98
+ f"hal={self.is_hallucination}, "
99
+ f"mode={self.mode!r})"
100
+ )
@@ -0,0 +1,5 @@
1
+ from .signal2_nli import NLISignal
2
+ from .signal3_cosine import CosineSignal
3
+ from .signal1_token import TokenLogProbSignal
4
+
5
+ __all__ = ["NLISignal", "CosineSignal", "TokenLogProbSignal"]
@@ -0,0 +1,45 @@
1
+ """Abstract base class for all CGS signals."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List
5
+
6
+
7
+ class BaseSignal(ABC):
8
+ """
9
+ Every signal must implement:
10
+ - is_available (property) : True if the signal's dependencies are installed/reachable
11
+ - score() : single sample → float in [0, 1]
12
+ - score_batch() : list of samples → list of floats
13
+ """
14
+
15
+ @property
16
+ @abstractmethod
17
+ def is_available(self) -> bool:
18
+ """Return True if this signal can be computed (deps installed, service up)."""
19
+
20
+ @abstractmethod
21
+ def score(self, question: str, answer: str, context: str) -> float:
22
+ """
23
+ Compute a raw signal value for one QA sample.
24
+
25
+ Returns
26
+ -------
27
+ float in [0, 1].
28
+ The *direction* of the signal (whether high = hallucinated or high = grounded)
29
+ is handled by CGSDetector, not here.
30
+ """
31
+
32
+ def score_batch(
33
+ self,
34
+ questions: List[str],
35
+ answers: List[str],
36
+ contexts: List[str],
37
+ ) -> List[float]:
38
+ """
39
+ Default batch implementation: loop over samples.
40
+ Subclasses may override for true batching (e.g. SentenceTransformer).
41
+ """
42
+ return [
43
+ self.score(q, a, c)
44
+ for q, a, c in zip(questions, answers, contexts)
45
+ ]
@@ -0,0 +1,122 @@
1
+ """
2
+ Signal 1 — Token Log-Probability Confidence.
3
+
4
+ Queries a locally-running Ollama instance to obtain token-level log-probabilities
5
+ for the generated answer given the question and context. Converts the geometric
6
+ mean log-prob to a confidence score in [0, 1].
7
+
8
+ High score = model is highly confident = less likely to be a hallucination.
9
+ Low score = model is uncertain = hallucination risk signal.
10
+
11
+ Requirements
12
+ ------------
13
+ - Ollama installed and running (https://ollama.com)
14
+ - The model pulled: ``ollama pull llama3.2``
15
+
16
+ Graceful fallback
17
+ -----------------
18
+ If Ollama is not reachable, ``is_available`` returns False and the detector
19
+ automatically falls back to Lite mode (S2+S3 only). ``score()`` returns 0.5
20
+ (neutral) so existing weights stay valid if called directly.
21
+ """
22
+
23
+ import json
24
+ import math
25
+ import urllib.request
26
+ import urllib.error
27
+ from typing import List
28
+
29
+ from .base import BaseSignal
30
+
31
+
32
+ class TokenLogProbSignal(BaseSignal):
33
+ """Token log-probability confidence via Ollama."""
34
+
35
+ def __init__(
36
+ self,
37
+ model_name: str = "llama3.2",
38
+ ollama_url: str = "http://localhost:11434",
39
+ timeout_sec: int = 30,
40
+ ):
41
+ self.model_name = model_name
42
+ self.ollama_url = ollama_url.rstrip("/")
43
+ self.timeout_sec = timeout_sec
44
+
45
+ # ------------------------------------------------------------------ #
46
+ @property
47
+ def is_available(self) -> bool:
48
+ """Ping the Ollama /api/tags endpoint to check if the service is up."""
49
+ try:
50
+ req = urllib.request.Request(
51
+ f"{self.ollama_url}/api/tags",
52
+ method="GET",
53
+ )
54
+ with urllib.request.urlopen(req, timeout=2):
55
+ return True
56
+ except Exception:
57
+ return False
58
+
59
+ # ------------------------------------------------------------------ #
60
+ def score(self, question: str, answer: str, context: str) -> float:
61
+ """
62
+ Returns a calibrated confidence score in [0, 1].
63
+
64
+ Prompt structure:
65
+ Context: <context>
66
+ Question: <question>
67
+ Answer: <answer>
68
+
69
+ We ask Ollama to *continue* the prompt (generation mode) and collect
70
+ the log-probs of the answer tokens, computing the geometric mean
71
+ probability as the confidence score.
72
+ """
73
+ prompt = (
74
+ f"Context: {context}\n\n"
75
+ f"Question: {question}\n\n"
76
+ f"Answer: {answer}"
77
+ )
78
+ payload = json.dumps({
79
+ "model": self.model_name,
80
+ "prompt": prompt,
81
+ "stream": False,
82
+ "options": {
83
+ "logprobs": True,
84
+ "temperature": 0, # deterministic
85
+ "num_predict": 1, # we only need the continuation start
86
+ },
87
+ }).encode("utf-8")
88
+
89
+ try:
90
+ req = urllib.request.Request(
91
+ f"{self.ollama_url}/api/generate",
92
+ data=payload,
93
+ headers={"Content-Type": "application/json"},
94
+ method="POST",
95
+ )
96
+ with urllib.request.urlopen(req, timeout=self.timeout_sec) as resp:
97
+ data = json.loads(resp.read().decode("utf-8"))
98
+
99
+ logprobs: list = data.get("logprobs") or []
100
+ if not logprobs:
101
+ return 0.5 # logprobs not returned by this model
102
+
103
+ # Geometric mean probability = exp(mean log-prob)
104
+ avg_lp = sum(logprobs) / len(logprobs)
105
+ conf = math.exp(avg_lp) # in (0, 1]
106
+ return max(0.0, min(1.0, conf))
107
+
108
+ except (urllib.error.URLError, json.JSONDecodeError, KeyError):
109
+ return 0.5 # graceful fallback — neutral confidence
110
+
111
+ # ------------------------------------------------------------------ #
112
+ def score_batch(
113
+ self,
114
+ questions: List[str],
115
+ answers: List[str],
116
+ contexts: List[str],
117
+ ) -> List[float]:
118
+ """Sequential batch (Ollama does not expose a true batch endpoint)."""
119
+ return [
120
+ self.score(q, a, c)
121
+ for q, a, c in zip(questions, answers, contexts)
122
+ ]
@@ -0,0 +1,115 @@
1
+ """
2
+ Signal 2 — NLI Faithfulness Score.
3
+
4
+ Uses cross-encoder/nli-deberta-v3-small to compute the probability that the
5
+ retrieved context *entails* the generated answer.
6
+
7
+ Hypothesis template (from thesis):
8
+ "The answer to '{question}' is: {answer}."
9
+
10
+ Returns the ENTAILMENT probability in [0, 1].
11
+ High score = answer is entailed by context = grounded.
12
+ Low score = answer contradicts or is unrelated to context = suspicious.
13
+ """
14
+
15
+ from typing import List
16
+
17
+ from .base import BaseSignal
18
+
19
+ # DeBERTa NLI label sets differ by model variant — handle all possibilities.
20
+ _ENTAIL_LABELS = {"entailment", "entail", "label_2", "yes"}
21
+ _CONTRA_LABELS = {"contradiction", "contradict", "label_0", "no"}
22
+
23
+
24
+ class NLISignal(BaseSignal):
25
+ """NLI entailment probability between context (premise) and answer (hypothesis)."""
26
+
27
+ def __init__(self, model_name: str = "cross-encoder/nli-deberta-v3-small"):
28
+ self.model_name = model_name
29
+ self._pipe = None # lazy load
30
+
31
+ # ------------------------------------------------------------------ #
32
+ @property
33
+ def is_available(self) -> bool:
34
+ try:
35
+ import transformers # noqa: F401
36
+ return True
37
+ except ImportError:
38
+ return False
39
+
40
+ # ------------------------------------------------------------------ #
41
+ def _load(self) -> None:
42
+ if self._pipe is None:
43
+ from transformers import pipeline
44
+ self._pipe = pipeline(
45
+ "text-classification",
46
+ model=self.model_name,
47
+ top_k=None, # return all label scores
48
+ truncation=True,
49
+ max_length=512,
50
+ )
51
+
52
+ # ------------------------------------------------------------------ #
53
+ def score(self, question: str, answer: str, context: str) -> float:
54
+ """
55
+ Returns entailment probability in [0, 1].
56
+ Higher = answer is more faithfully supported by context.
57
+ """
58
+ self._load()
59
+ premise = str(context)
60
+ hypothesis = f"The answer to '{question}' is: {answer}."
61
+
62
+ raw = self._pipe(
63
+ {"text": premise, "text_pair": hypothesis}
64
+ )
65
+ # raw is a list of dicts: [{"label": "ENTAILMENT", "score": 0.92}, ...]
66
+ return self._extract_entailment(raw)
67
+
68
+ # ------------------------------------------------------------------ #
69
+ def _extract_entailment(self, raw) -> float:
70
+ """
71
+ Robustly extract entailment probability from the pipeline output,
72
+ regardless of how the model labels its classes.
73
+ """
74
+ if not raw:
75
+ return 0.5
76
+
77
+ # Normalise to a list if wrapped in an extra list
78
+ items = raw[0] if (isinstance(raw[0], list)) else raw
79
+
80
+ label_score = {}
81
+ for item in items:
82
+ label_score[item["label"].lower()] = float(item["score"])
83
+
84
+ # Try to find entailment label directly
85
+ for lbl in _ENTAIL_LABELS:
86
+ if lbl in label_score:
87
+ return label_score[lbl]
88
+
89
+ # Fallback: 1 - contradiction_score (works when model uses 3 classes)
90
+ for lbl in _CONTRA_LABELS:
91
+ if lbl in label_score:
92
+ neutral_score = sum(
93
+ v for k, v in label_score.items()
94
+ if k not in _CONTRA_LABELS
95
+ )
96
+ return max(0.0, min(1.0, neutral_score))
97
+
98
+ # Last resort: assume highest-scoring label is entailment
99
+ return max(label_score.values()) if label_score else 0.5
100
+
101
+ # ------------------------------------------------------------------ #
102
+ def score_batch(
103
+ self,
104
+ questions: List[str],
105
+ answers: List[str],
106
+ contexts: List[str],
107
+ ) -> List[float]:
108
+ """Batch using transformers pipeline (handles internal batching)."""
109
+ self._load()
110
+ inputs = [
111
+ {"text": str(c), "text_pair": f"The answer to '{q}' is: {a}."}
112
+ for q, a, c in zip(questions, answers, contexts)
113
+ ]
114
+ results = self._pipe(inputs)
115
+ return [self._extract_entailment(r) for r in results]
@@ -0,0 +1,85 @@
1
+ """
2
+ Signal 3 — Semantic Attribution via Cosine Similarity.
3
+
4
+ Uses all-MiniLM-L6-v2 (384-dim) to embed the answer and the retrieved context,
5
+ then computes their cosine similarity.
6
+
7
+ Direction note
8
+ --------------
9
+ This signal returns raw cosine similarity in [0, 1].
10
+ The CGSDetector decides whether high cosine = risk or high cosine = grounding
11
+ based on the calibrated ``direction`` attribute.
12
+ """
13
+
14
+ from typing import List
15
+ import numpy as np
16
+
17
+ from .base import BaseSignal
18
+
19
+
20
+ class CosineSignal(BaseSignal):
21
+ """Cosine similarity between answer embedding and context embedding."""
22
+
23
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
24
+ self.model_name = model_name
25
+ self._model = None # lazy load
26
+
27
+ # ------------------------------------------------------------------ #
28
+ @property
29
+ def is_available(self) -> bool:
30
+ try:
31
+ import sentence_transformers # noqa: F401
32
+ return True
33
+ except ImportError:
34
+ return False
35
+
36
+ # ------------------------------------------------------------------ #
37
+ def _load(self) -> None:
38
+ if self._model is None:
39
+ from sentence_transformers import SentenceTransformer
40
+ self._model = SentenceTransformer(self.model_name)
41
+
42
+ # ------------------------------------------------------------------ #
43
+ def score(self, question: str, answer: str, context: str) -> float:
44
+ """
45
+ Returns cosine similarity between answer and context in [0, 1].
46
+ question is accepted but not used (kept for API consistency).
47
+ """
48
+ self._load()
49
+ embs = self._model.encode(
50
+ [str(answer), str(context)],
51
+ convert_to_numpy=True,
52
+ show_progress_bar=False,
53
+ )
54
+ a, b = embs[0], embs[1]
55
+ norm = np.linalg.norm(a) * np.linalg.norm(b)
56
+ if norm < 1e-10:
57
+ return 0.0
58
+ cosine = float(np.dot(a, b) / norm)
59
+ return max(0.0, min(1.0, cosine))
60
+
61
+ # ------------------------------------------------------------------ #
62
+ def score_batch(
63
+ self,
64
+ questions: List[str],
65
+ answers: List[str],
66
+ contexts: List[str],
67
+ ) -> List[float]:
68
+ """
69
+ True batch: encode all answers and contexts in two forward passes,
70
+ then compute pairwise cosine. Much faster than looping.
71
+ """
72
+ self._load()
73
+ all_answers = [str(a) for a in answers]
74
+ all_contexts = [str(c) for c in contexts]
75
+
76
+ emb_ans = self._model.encode(all_answers, convert_to_numpy=True, show_progress_bar=False)
77
+ emb_ctx = self._model.encode(all_contexts, convert_to_numpy=True, show_progress_bar=False)
78
+
79
+ # Row-wise cosine
80
+ norms_ans = np.linalg.norm(emb_ans, axis=1, keepdims=True).clip(min=1e-10)
81
+ norms_ctx = np.linalg.norm(emb_ctx, axis=1, keepdims=True).clip(min=1e-10)
82
+ emb_ans_n = emb_ans / norms_ans
83
+ emb_ctx_n = emb_ctx / norms_ctx
84
+ cosines = (emb_ans_n * emb_ctx_n).sum(axis=1)
85
+ return [max(0.0, min(1.0, float(c))) for c in cosines]
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: cgs-rag
3
+ Version: 0.1.0
4
+ Summary: Composite Grounding Score: multi-signal hallucination detection for production RAG systems
5
+ Author-email: Nishant Kumar <nishant.k@marmeto.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/nishant-k-marmeto/cgs-rag
8
+ Project-URL: Repository, https://github.com/nishant-k-marmeto/cgs-rag
9
+ Keywords: rag,retrieval-augmented-generation,hallucination,detection,nlp,llm,grounding,faithfulness
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: torch>=2.0.0
23
+ Requires-Dist: transformers>=4.36.0
24
+ Requires-Dist: sentence-transformers>=2.2.0
25
+ Requires-Dist: scikit-learn>=1.3.0
26
+ Requires-Dist: numpy>=1.24.0
27
+ Requires-Dist: pandas>=2.0.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Requires-Dist: pytest-cov; extra == "dev"
31
+ Requires-Dist: jupyter; extra == "dev"
32
+ Requires-Dist: matplotlib; extra == "dev"
33
+ Requires-Dist: seaborn; extra == "dev"
@@ -0,0 +1,15 @@
1
+ pyproject.toml
2
+ cgs_rag/__init__.py
3
+ cgs_rag/calibration.py
4
+ cgs_rag/detector.py
5
+ cgs_rag/result.py
6
+ cgs_rag.egg-info/PKG-INFO
7
+ cgs_rag.egg-info/SOURCES.txt
8
+ cgs_rag.egg-info/dependency_links.txt
9
+ cgs_rag.egg-info/requires.txt
10
+ cgs_rag.egg-info/top_level.txt
11
+ cgs_rag/signals/__init__.py
12
+ cgs_rag/signals/base.py
13
+ cgs_rag/signals/signal1_token.py
14
+ cgs_rag/signals/signal2_nli.py
15
+ cgs_rag/signals/signal3_cosine.py
@@ -0,0 +1,13 @@
1
+ torch>=2.0.0
2
+ transformers>=4.36.0
3
+ sentence-transformers>=2.2.0
4
+ scikit-learn>=1.3.0
5
+ numpy>=1.24.0
6
+ pandas>=2.0.0
7
+
8
+ [dev]
9
+ pytest>=7.0
10
+ pytest-cov
11
+ jupyter
12
+ matplotlib
13
+ seaborn
@@ -0,0 +1 @@
1
+ cgs_rag
@@ -0,0 +1,57 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "cgs-rag"
7
+ version = "0.1.0"
8
+ description = "Composite Grounding Score: multi-signal hallucination detection for production RAG systems"
9
+ requires-python = ">=3.9"
10
+ license = {text = "MIT"}
11
+ authors = [{name = "Nishant Kumar", email = "nishant.k@marmeto.com"}]
12
+
13
+ keywords = [
14
+ "rag", "retrieval-augmented-generation",
15
+ "hallucination", "detection",
16
+ "nlp", "llm", "grounding", "faithfulness",
17
+ ]
18
+
19
+ classifiers = [
20
+ "Development Status :: 3 - Alpha",
21
+ "Intended Audience :: Developers",
22
+ "Intended Audience :: Science/Research",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "License :: OSI Approved :: MIT License",
30
+ "Operating System :: OS Independent",
31
+ ]
32
+
33
+ dependencies = [
34
+ "torch>=2.0.0",
35
+ "transformers>=4.36.0",
36
+ "sentence-transformers>=2.2.0",
37
+ "scikit-learn>=1.3.0",
38
+ "numpy>=1.24.0",
39
+ "pandas>=2.0.0",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ dev = [
44
+ "pytest>=7.0",
45
+ "pytest-cov",
46
+ "jupyter",
47
+ "matplotlib",
48
+ "seaborn",
49
+ ]
50
+
51
+ [project.urls]
52
+ Homepage = "https://github.com/nishant-k-marmeto/cgs-rag"
53
+ Repository = "https://github.com/nishant-k-marmeto/cgs-rag"
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = ["."]
57
+ include = ["cgs_rag*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+