TruthTorchLM 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthtorchlm-0.1.0/LICENSE +21 -0
- truthtorchlm-0.1.0/LICENSE copy +21 -0
- truthtorchlm-0.1.0/PKG-INFO +209 -0
- truthtorchlm-0.1.0/README.md +177 -0
- truthtorchlm-0.1.0/setup.cfg +4 -0
- truthtorchlm-0.1.0/setup.py +38 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/__init__.py +16 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/availability.py +14 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/calibration.py +36 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/__init__.py +8 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/bleu.py +20 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/correctness_evaluator.py +14 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/eval_truth_method.py +59 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/model_judge.py +61 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/evaluators/rouge.py +19 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/generation.py +389 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/__init__.py +5 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/__init__.py +8 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/decomposition_method.py +27 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/structured_decomposition_api.py +50 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/structured_decomposition_local.py +43 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/unstructured_decomposition_api.py +50 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/decomposition_methods/unstructured_decomposition_local.py +65 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/evaluators/__init__.py +4 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/evaluators/eval_claim.py +223 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/evaluators/long_gen_eval.py +158 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/generation.py +167 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/statement_check_methods/__init__.py +7 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/statement_check_methods/answer_statement_entailment.py +219 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/statement_check_methods/question_answer_generation.py +354 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/statement_check_methods/question_generation.py +293 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/statement_check_methods/statement_check_method.py +46 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/utils/__init__.py +3 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/utils/dataset_utils.py +90 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/utils/eval_utils.py +188 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/long_form_generation/utils/safe_utils.py +231 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/normalizers/__init__.py +4 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/normalizers/normalizer.py +36 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/normalizers/sigmoid_normalizer.py +34 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/scoring_methods/__init__.py +5 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/scoring_methods/length_normalized_scoring.py +12 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/scoring_methods/log_prob_scoring.py +11 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/scoring_methods/scoring_method.py +19 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/templates.py +169 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/__init__.py +31 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/attention_score.py +52 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/confidence.py +59 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/cross_examination.py +164 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/eccentricity_confidence.py +74 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/eccentricity_uncertainty.py +69 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/entropy.py +66 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/google_search_check.py +144 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/inside.py +49 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/kernel_language_entropy.py +81 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/lars.py +479 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/mars.py +196 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/matrix_degree_confidence.py +78 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/matrix_degree_uncertainty.py +74 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/multi_llm_collab.py +535 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/num_semantic_set_uncertainty.py +70 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/p_true.py +71 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/saplma.py +206 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/self_detection.py +133 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/semantic_entropy.py +93 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/sentSAR.py +101 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/sum_eigen_uncertainty.py +71 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/tokenSAR.py +76 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/truth_method.py +73 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/truth_methods/verbalized_confidence.py +77 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/__init__.py +5 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/calibration_utils.py +64 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/common_utils.py +374 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/dataset_utils.py +127 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/eval_utils.py +280 -0
- truthtorchlm-0.1.0/src/TruthTorchLM/utils/google_search_utils.py +136 -0
- truthtorchlm-0.1.0/src/TruthTorchLM.egg-info/PKG-INFO +209 -0
- truthtorchlm-0.1.0/src/TruthTorchLM.egg-info/SOURCES.txt +78 -0
- truthtorchlm-0.1.0/src/TruthTorchLM.egg-info/dependency_links.txt +1 -0
- truthtorchlm-0.1.0/src/TruthTorchLM.egg-info/requires.txt +21 -0
- truthtorchlm-0.1.0/src/TruthTorchLM.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Yavuz Faruk Bakman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Yavuz Faruk Bakman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: TruthTorchLM
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TruthTorchLM is an open-source library designed to detect and mitigate hallucinations in text generation models. The library integrates state-of-the-art methods, offers comprehensive benchmarking tools across various tasks, and enables seamless integration with popular frameworks like Huggingface and LiteLLM.
|
|
5
|
+
Author: Yavuz Faruk Bakman
|
|
6
|
+
Author-email: ybakman@usc.edu
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
License-File: LICENSE copy
|
|
11
|
+
Requires-Dist: aiohttp==3.9.3
|
|
12
|
+
Requires-Dist: evaluate==0.4.1
|
|
13
|
+
Requires-Dist: instructor
|
|
14
|
+
Requires-Dist: litellm==1.57.0
|
|
15
|
+
Requires-Dist: nest_asyncio==1.6.0
|
|
16
|
+
Requires-Dist: numpy==1.26.4
|
|
17
|
+
Requires-Dist: outlines==0.1.4
|
|
18
|
+
Requires-Dist: pandas==2.2.3
|
|
19
|
+
Requires-Dist: pydantic
|
|
20
|
+
Requires-Dist: PyYAML
|
|
21
|
+
Requires-Dist: Requests==2.32.3
|
|
22
|
+
Requires-Dist: scikit_learn==1.4.1.post1
|
|
23
|
+
Requires-Dist: scipy==1.13.0
|
|
24
|
+
Requires-Dist: sentence_transformers==3.1.1
|
|
25
|
+
Requires-Dist: termcolor==2.5.0
|
|
26
|
+
Requires-Dist: torch==2.2.2
|
|
27
|
+
Requires-Dist: tqdm==4.66.2
|
|
28
|
+
Requires-Dist: transformers==4.44.2
|
|
29
|
+
Requires-Dist: absl-py
|
|
30
|
+
Requires-Dist: nltk
|
|
31
|
+
Requires-Dist: rouge_score
|
|
32
|
+
|
|
33
|
+
# TruthTorchLM: A Comprehensive Library for Hallucination Detection in LLMs
|
|
34
|
+
|
|
35
|
+
**TruthTorchLM** is an open-source library designed to detect and mitigate hallucinations in text generation models. The library integrates state-of-the-art methods, offers comprehensive benchmarking tools across various tasks, and enables seamless integration with popular frameworks like Huggingface and LiteLLM.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Features
|
|
40
|
+
|
|
41
|
+
- **State-of-the-Art Methods**: Implementations of advanced hallucination detection techniques.
|
|
42
|
+
- **Evaluation Tools**: Benchmark hallucination detection methods using various metrics like AUROC, PRR, and Accuracy.
|
|
43
|
+
- **Calibration**: Normalize and calibrate truth values for interpretable and comparable hallucination scores.
|
|
44
|
+
- **Integration**: Seamlessly works with Huggingface and LiteLLM.
|
|
45
|
+
- **Long-Form Generation**: Adapts detection methods to handle long-form text generations effectively.
|
|
46
|
+
- **Extendability**: Provides an intuitive interface for implementing new hallucination detection methods.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
Install TruthTorchLM using pip:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install TruthTorchLM
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Quick Start
|
|
61
|
+
|
|
62
|
+
### Setting Up a Model
|
|
63
|
+
|
|
64
|
+
You can define your model and tokenizer using Huggingface or specify an API-based model:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
68
|
+
import TruthTorchLM as ttlm
|
|
69
|
+
import torch
|
|
70
|
+
|
|
71
|
+
# Huggingface model
|
|
72
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
73
|
+
"meta-llama/Llama-2-7b-chat-hf",
|
|
74
|
+
torch_dtype=torch.bfloat16
|
|
75
|
+
).to('cuda:0')
|
|
76
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_fast=False)
|
|
77
|
+
|
|
78
|
+
# API model
|
|
79
|
+
api_model = "gpt-4o"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Generating Text with Truth Values
|
|
83
|
+
|
|
84
|
+
TruthTorchLM generates messages with a truth value, indicating whether the model output is hallucinating or not. Various methods (called **truth methods**) can be used to detect hallucinations. Each method can have different algorithms and output ranges. Lower truth values generally suggest hallucinations. This functionality is mostly useful for short-form QA:
|
|
85
|
+
```python
|
|
86
|
+
# Define truth methods
|
|
87
|
+
lars = ttlm.truth_methods.LARS()
|
|
88
|
+
confidence = ttlm.truth_methods.Confidence()
|
|
89
|
+
self_detection = ttlm.truth_methods.SelfDetection(number_of_questions=5)
|
|
90
|
+
truth_methods = [lars, confidence, self_detection]
|
|
91
|
+
```
|
|
92
|
+
```python
|
|
93
|
+
# Define a chat history
|
|
94
|
+
chat = [{"role": "system", "content": "You are a helpful assistant. Give short and precise answers."},
|
|
95
|
+
{"role": "user", "content": "What is the capital city of France?"}]
|
|
96
|
+
```
|
|
97
|
+
```python
|
|
98
|
+
# Generate text with truth values (Huggingface model)
|
|
99
|
+
output_hf_model = ttlm.generate_with_truth_value(
|
|
100
|
+
model=model,
|
|
101
|
+
tokenizer=tokenizer,
|
|
102
|
+
messages=chat,
|
|
103
|
+
truth_methods=truth_methods,
|
|
104
|
+
max_new_tokens=100,
|
|
105
|
+
temperature=0.7
|
|
106
|
+
)
|
|
107
|
+
# Generate text with truth values (API model)
|
|
108
|
+
output_api_model = ttlm.generate_with_truth_value(
|
|
109
|
+
model=api_model,
|
|
110
|
+
messages=chat,
|
|
111
|
+
truth_methods=truth_methods
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Calibrating Truth Methods
|
|
116
|
+
Truth values for different methods may not be directly comparable. Use the `calibrate_truth_method` function to normalize truth values to a common range for better interpretability
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
model_judge = ttlm.evaluators.ModelJudge('gpt-4o-mini')
|
|
120
|
+
calibration_results = ttlm.calibrate_truth_method(
|
|
121
|
+
dataset='trivia_qa',
|
|
122
|
+
model=model,
|
|
123
|
+
truth_methods=truth_methods,
|
|
124
|
+
tokenizer=tokenizer,
|
|
125
|
+
correctness_evaluator=model_judge,
|
|
126
|
+
size_of_data=1000,
|
|
127
|
+
max_new_tokens=64
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Evaluating Truth Methods
|
|
132
|
+
|
|
133
|
+
We can evaluate the truth methods with the `evaluate_truth_method` function. We can define different evaluation metrics including AUROC, AUPRC, AUARC, Accuracy, F1, Precision, Recall, PRR:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
results = ttlm.evaluate_truth_method(
|
|
137
|
+
dataset='trivia_qa',
|
|
138
|
+
model=model,
|
|
139
|
+
truth_methods=truth_methods,
|
|
140
|
+
eval_metrics=['auroc', 'prr'],
|
|
141
|
+
tokenizer=tokenizer,
|
|
142
|
+
size_of_data=1000,
|
|
143
|
+
correctness_evaluator=model_judge,
|
|
144
|
+
max_new_tokens=64
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Available Hallucination Detection Methods
|
|
151
|
+
|
|
152
|
+
- **LARS**: [Do Not Design, Learn: A Trainable Scoring Function for Uncertainty Estimation in Generative LLMs](https://arxiv.org/pdf/2406.11278).
|
|
153
|
+
- **Confidence**: [Uncertainty Estimation in Autoregressive Structured Prediction](https://openreview.net/pdf?id=jN5y-zb5Q7m).
|
|
154
|
+
- **Entropy**:[Uncertainty Estimation in Autoregressive Structured Prediction](https://openreview.net/pdf?id=jN5y-zb5Q7m).
|
|
155
|
+
- **SelfDetection**: [Knowing What LLMs DO NOT Know: A Simple Yet Effective Self-Detection Method](https://arxiv.org/pdf/2310.17918).
|
|
156
|
+
- **AttentionScore**: [LLM-Check: Investigating Detection of Hallucinations in Large Language Models](https://openreview.net/pdf?id=LYx4w3CAgy).
|
|
157
|
+
- **CrossExamination**: [LM vs LM: Detecting Factual Errors via Cross Examination](https://arxiv.org/pdf/2305.13281).
|
|
158
|
+
- **EccentricityConfidence**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
159
|
+
- **EccentricityUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
160
|
+
- **GoogleSearchCheck**: [FacTool: Factuality Detection in Generative AI -- A Tool Augmented Framework for Multi-Task and Multi-Domain Scenarios](https://arxiv.org/pdf/2307.13528).
|
|
161
|
+
- **Inside**: [INSIDE: LLMs' Internal States Retain the Power of Hallucination Detection](https://openreview.net/pdf?id=Zj12nzlQbz).
|
|
162
|
+
- **KernelLanguageEntropy**: [Kernel Language Entropy: Fine-grained Uncertainty Quantification for LLMs from Semantic Similarities](https://arxiv.org/pdf/2405.20003).
|
|
163
|
+
- **MARS**: [MARS: Meaning-Aware Response Scoring for Uncertainty Estimation in Generative LLMs](https://aclanthology.org/2024.acl-long.419.pdf).
|
|
164
|
+
- **MatrixDegreeConfidence**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
165
|
+
- **MatrixDegreeUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
166
|
+
- **MultiLLMCollab**: [Don’t Hallucinate, Abstain: Identifying LLM Knowledge Gaps via Multi-LLM Collaboration](https://arxiv.org/pdf/2402.00367).
|
|
167
|
+
- **NumSemanticSetUncertainty**: [Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation](https://arxiv.org/pdf/2302.09664).
|
|
168
|
+
- **PTrue**: [Language Models (Mostly) Know What They Know](https://arxiv.org/pdf/2207.05221).
|
|
169
|
+
- **Saplma**: [The Internal State of an LLM Knows When It’s Lying](https://aclanthology.org/2023.findings-emnlp.68.pdf).
|
|
170
|
+
- **SemanticEntropy**: [Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation](https://arxiv.org/pdf/2302.09664).
|
|
171
|
+
- **sentSAR**: [Shifting Attention to Relevance: Towards the Predictive Uncertainty Quantification of Free-Form Large Language Models](https://aclanthology.org/2024.acl-long.276.pdf).
|
|
172
|
+
- **SumEigenUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
173
|
+
- **tokenSAR**: [Shifting Attention to Relevance: Towards the Predictive Uncertainty Quantification of Free-Form Large Language Models](https://aclanthology.org/2024.acl-long.276.pdf).
|
|
174
|
+
- **VerbalizedConfidence**: [Just Ask for Calibration: Strategies for Eliciting Calibrated Confidence Scores from Language Models Fine-Tuned with Human Feedback](https://openreview.net/pdf?id=g3faCfrwm7).
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Contributors
|
|
179
|
+
|
|
180
|
+
- **Yavuz Faruk Bakman** (ybakman@usc.edu)
|
|
181
|
+
- **Duygu Nur Yaldiz** (yaldiz@usc.edu)
|
|
182
|
+
- **Sungmin Kang** (kangsung@usc.edu)
|
|
183
|
+
- **Hayrettin Eren Yildiz** (hayereyil@gmail.com)
|
|
184
|
+
- **Alperen Ozis** (alperenozis@gmail.com)
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Citation
|
|
189
|
+
|
|
190
|
+
If you use TruthTorchLM in your research, please cite:
|
|
191
|
+
|
|
192
|
+
```bibtex
|
|
193
|
+
@misc{truthtorchlm2025,
|
|
194
|
+
title={TruthTorchLM: A Comprehensive Library for Hallucination Detection in Large Language Models},
|
|
195
|
+
author={Yavuz Faruk Bakman, Duygu Nur Yaldiz,Sungmin Kang, Hayrettin Eren Yildiz, Alperen Ozis},
|
|
196
|
+
year={2025},
|
|
197
|
+
howpublished={GitHub},
|
|
198
|
+
url={https://github.com/Ybakman/TruthTorchLM}
|
|
199
|
+
}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## License
|
|
205
|
+
|
|
206
|
+
TruthTorchLM is released under the [MIT License](LICENSE).
|
|
207
|
+
|
|
208
|
+
For inquiries or support, feel free to contact the maintainers.
|
|
209
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# TruthTorchLM: A Comprehensive Library for Hallucination Detection in LLMs
|
|
2
|
+
|
|
3
|
+
**TruthTorchLM** is an open-source library designed to detect and mitigate hallucinations in text generation models. The library integrates state-of-the-art methods, offers comprehensive benchmarking tools across various tasks, and enables seamless integration with popular frameworks like Huggingface and LiteLLM.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **State-of-the-Art Methods**: Implementations of advanced hallucination detection techniques.
|
|
10
|
+
- **Evaluation Tools**: Benchmark hallucination detection methods using various metrics like AUROC, PRR, and Accuracy.
|
|
11
|
+
- **Calibration**: Normalize and calibrate truth values for interpretable and comparable hallucination scores.
|
|
12
|
+
- **Integration**: Seamlessly works with Huggingface and LiteLLM.
|
|
13
|
+
- **Long-Form Generation**: Adapts detection methods to handle long-form text generations effectively.
|
|
14
|
+
- **Extendability**: Provides an intuitive interface for implementing new hallucination detection methods.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
Install TruthTorchLM using pip:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install TruthTorchLM
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
### Setting Up a Model
|
|
31
|
+
|
|
32
|
+
You can define your model and tokenizer using Huggingface or specify an API-based model:
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
36
|
+
import TruthTorchLM as ttlm
|
|
37
|
+
import torch
|
|
38
|
+
|
|
39
|
+
# Huggingface model
|
|
40
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
41
|
+
"meta-llama/Llama-2-7b-chat-hf",
|
|
42
|
+
torch_dtype=torch.bfloat16
|
|
43
|
+
).to('cuda:0')
|
|
44
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_fast=False)
|
|
45
|
+
|
|
46
|
+
# API model
|
|
47
|
+
api_model = "gpt-4o"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Generating Text with Truth Values
|
|
51
|
+
|
|
52
|
+
TruthTorchLM generates messages with a truth value, indicating whether the model output is hallucinating or not. Various methods (called **truth methods**) can be used to detect hallucinations. Each method can have different algorithms and output ranges. Lower truth values generally suggest hallucinations. This functionality is mostly useful for short-form QA:
|
|
53
|
+
```python
|
|
54
|
+
# Define truth methods
|
|
55
|
+
lars = ttlm.truth_methods.LARS()
|
|
56
|
+
confidence = ttlm.truth_methods.Confidence()
|
|
57
|
+
self_detection = ttlm.truth_methods.SelfDetection(number_of_questions=5)
|
|
58
|
+
truth_methods = [lars, confidence, self_detection]
|
|
59
|
+
```
|
|
60
|
+
```python
|
|
61
|
+
# Define a chat history
|
|
62
|
+
chat = [{"role": "system", "content": "You are a helpful assistant. Give short and precise answers."},
|
|
63
|
+
{"role": "user", "content": "What is the capital city of France?"}]
|
|
64
|
+
```
|
|
65
|
+
```python
|
|
66
|
+
# Generate text with truth values (Huggingface model)
|
|
67
|
+
output_hf_model = ttlm.generate_with_truth_value(
|
|
68
|
+
model=model,
|
|
69
|
+
tokenizer=tokenizer,
|
|
70
|
+
messages=chat,
|
|
71
|
+
truth_methods=truth_methods,
|
|
72
|
+
max_new_tokens=100,
|
|
73
|
+
temperature=0.7
|
|
74
|
+
)
|
|
75
|
+
# Generate text with truth values (API model)
|
|
76
|
+
output_api_model = ttlm.generate_with_truth_value(
|
|
77
|
+
model=api_model,
|
|
78
|
+
messages=chat,
|
|
79
|
+
truth_methods=truth_methods
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Calibrating Truth Methods
|
|
84
|
+
Truth values for different methods may not be directly comparable. Use the `calibrate_truth_method` function to normalize truth values to a common range for better interpretability
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
model_judge = ttlm.evaluators.ModelJudge('gpt-4o-mini')
|
|
88
|
+
calibration_results = ttlm.calibrate_truth_method(
|
|
89
|
+
dataset='trivia_qa',
|
|
90
|
+
model=model,
|
|
91
|
+
truth_methods=truth_methods,
|
|
92
|
+
tokenizer=tokenizer,
|
|
93
|
+
correctness_evaluator=model_judge,
|
|
94
|
+
size_of_data=1000,
|
|
95
|
+
max_new_tokens=64
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Evaluating Truth Methods
|
|
100
|
+
|
|
101
|
+
We can evaluate the truth methods with the `evaluate_truth_method` function. We can define different evaluation metrics including AUROC, AUPRC, AUARC, Accuracy, F1, Precision, Recall, PRR:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
results = ttlm.evaluate_truth_method(
|
|
105
|
+
dataset='trivia_qa',
|
|
106
|
+
model=model,
|
|
107
|
+
truth_methods=truth_methods,
|
|
108
|
+
eval_metrics=['auroc', 'prr'],
|
|
109
|
+
tokenizer=tokenizer,
|
|
110
|
+
size_of_data=1000,
|
|
111
|
+
correctness_evaluator=model_judge,
|
|
112
|
+
max_new_tokens=64
|
|
113
|
+
)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Available Hallucination Detection Methods
|
|
119
|
+
|
|
120
|
+
- **LARS**: [Do Not Design, Learn: A Trainable Scoring Function for Uncertainty Estimation in Generative LLMs](https://arxiv.org/pdf/2406.11278).
|
|
121
|
+
- **Confidence**: [Uncertainty Estimation in Autoregressive Structured Prediction](https://openreview.net/pdf?id=jN5y-zb5Q7m).
|
|
122
|
+
- **Entropy**:[Uncertainty Estimation in Autoregressive Structured Prediction](https://openreview.net/pdf?id=jN5y-zb5Q7m).
|
|
123
|
+
- **SelfDetection**: [Knowing What LLMs DO NOT Know: A Simple Yet Effective Self-Detection Method](https://arxiv.org/pdf/2310.17918).
|
|
124
|
+
- **AttentionScore**: [LLM-Check: Investigating Detection of Hallucinations in Large Language Models](https://openreview.net/pdf?id=LYx4w3CAgy).
|
|
125
|
+
- **CrossExamination**: [LM vs LM: Detecting Factual Errors via Cross Examination](https://arxiv.org/pdf/2305.13281).
|
|
126
|
+
- **EccentricityConfidence**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
127
|
+
- **EccentricityUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
128
|
+
- **GoogleSearchCheck**: [FacTool: Factuality Detection in Generative AI -- A Tool Augmented Framework for Multi-Task and Multi-Domain Scenarios](https://arxiv.org/pdf/2307.13528).
|
|
129
|
+
- **Inside**: [INSIDE: LLMs' Internal States Retain the Power of Hallucination Detection](https://openreview.net/pdf?id=Zj12nzlQbz).
|
|
130
|
+
- **KernelLanguageEntropy**: [Kernel Language Entropy: Fine-grained Uncertainty Quantification for LLMs from Semantic Similarities](https://arxiv.org/pdf/2405.20003).
|
|
131
|
+
- **MARS**: [MARS: Meaning-Aware Response Scoring for Uncertainty Estimation in Generative LLMs](https://aclanthology.org/2024.acl-long.419.pdf).
|
|
132
|
+
- **MatrixDegreeConfidence**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
133
|
+
- **MatrixDegreeUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
134
|
+
- **MultiLLMCollab**: [Don’t Hallucinate, Abstain: Identifying LLM Knowledge Gaps via Multi-LLM Collaboration](https://arxiv.org/pdf/2402.00367).
|
|
135
|
+
- **NumSemanticSetUncertainty**: [Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation](https://arxiv.org/pdf/2302.09664).
|
|
136
|
+
- **PTrue**: [Language Models (Mostly) Know What They Know](https://arxiv.org/pdf/2207.05221).
|
|
137
|
+
- **Saplma**: [The Internal State of an LLM Knows When It’s Lying](https://aclanthology.org/2023.findings-emnlp.68.pdf).
|
|
138
|
+
- **SemanticEntropy**: [Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation](https://arxiv.org/pdf/2302.09664).
|
|
139
|
+
- **sentSAR**: [Shifting Attention to Relevance: Towards the Predictive Uncertainty Quantification of Free-Form Large Language Models](https://aclanthology.org/2024.acl-long.276.pdf).
|
|
140
|
+
- **SumEigenUncertainty**: [Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models](https://arxiv.org/pdf/2305.19187).
|
|
141
|
+
- **tokenSAR**: [Shifting Attention to Relevance: Towards the Predictive Uncertainty Quantification of Free-Form Large Language Models](https://aclanthology.org/2024.acl-long.276.pdf).
|
|
142
|
+
- **VerbalizedConfidence**: [Just Ask for Calibration: Strategies for Eliciting Calibrated Confidence Scores from Language Models Fine-Tuned with Human Feedback](https://openreview.net/pdf?id=g3faCfrwm7).
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Contributors
|
|
147
|
+
|
|
148
|
+
- **Yavuz Faruk Bakman** (ybakman@usc.edu)
|
|
149
|
+
- **Duygu Nur Yaldiz** (yaldiz@usc.edu)
|
|
150
|
+
- **Sungmin Kang** (kangsung@usc.edu)
|
|
151
|
+
- **Hayrettin Eren Yildiz** (hayereyil@gmail.com)
|
|
152
|
+
- **Alperen Ozis** (alperenozis@gmail.com)
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Citation
|
|
157
|
+
|
|
158
|
+
If you use TruthTorchLM in your research, please cite:
|
|
159
|
+
|
|
160
|
+
```bibtex
|
|
161
|
+
@misc{truthtorchlm2025,
|
|
162
|
+
title={TruthTorchLM: A Comprehensive Library for Hallucination Detection in Large Language Models},
|
|
163
|
+
author={Yavuz Faruk Bakman, Duygu Nur Yaldiz,Sungmin Kang, Hayrettin Eren Yildiz, Alperen Ozis},
|
|
164
|
+
year={2025},
|
|
165
|
+
howpublished={GitHub},
|
|
166
|
+
url={https://github.com/Ybakman/TruthTorchLM}
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
TruthTorchLM is released under the [MIT License](LICENSE).
|
|
175
|
+
|
|
176
|
+
For inquiries or support, feel free to contact the maintainers.
|
|
177
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
requirements = ["aiohttp==3.9.3",
|
|
4
|
+
"evaluate==0.4.1",
|
|
5
|
+
"instructor",
|
|
6
|
+
"litellm==1.57.0",
|
|
7
|
+
"nest_asyncio==1.6.0",
|
|
8
|
+
"numpy==1.26.4",
|
|
9
|
+
"outlines==0.1.4",
|
|
10
|
+
"pandas==2.2.3",
|
|
11
|
+
"pydantic",
|
|
12
|
+
"PyYAML",
|
|
13
|
+
"Requests==2.32.3",
|
|
14
|
+
"scikit_learn==1.4.1.post1",
|
|
15
|
+
"scipy==1.13.0",
|
|
16
|
+
"sentence_transformers==3.1.1",
|
|
17
|
+
"termcolor==2.5.0",
|
|
18
|
+
"torch==2.2.2",
|
|
19
|
+
"tqdm==4.66.2",
|
|
20
|
+
"transformers==4.44.2",
|
|
21
|
+
"absl-py",
|
|
22
|
+
"nltk",
|
|
23
|
+
"rouge_score"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
setup(
|
|
27
|
+
name="TruthTorchLM", # Your package name
|
|
28
|
+
version="0.1.0", # Package version
|
|
29
|
+
author="Yavuz Faruk Bakman",
|
|
30
|
+
author_email="ybakman@usc.edu",
|
|
31
|
+
description="TruthTorchLM is an open-source library designed to detect and mitigate hallucinations in text generation models. The library integrates state-of-the-art methods, offers comprehensive benchmarking tools across various tasks, and enables seamless integration with popular frameworks like Huggingface and LiteLLM.",
|
|
32
|
+
long_description=open("README.md").read(),
|
|
33
|
+
long_description_content_type="text/markdown",
|
|
34
|
+
package_dir={"": "src"}, # Maps the base package directory
|
|
35
|
+
packages=find_packages(where="src"), # Automatically find and include all packages
|
|
36
|
+
install_requires=requirements, # List of dependencies
|
|
37
|
+
python_requires=">=3.10", # Minimum Python version
|
|
38
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .truth_methods.truth_method import TruthMethod
|
|
2
|
+
from TruthTorchLM import utils ##TODO do we really need to import this?
|
|
3
|
+
from TruthTorchLM import scoring_methods
|
|
4
|
+
from TruthTorchLM import truth_methods
|
|
5
|
+
from .generation import generate_with_truth_value
|
|
6
|
+
from .calibration import calibrate_truth_method
|
|
7
|
+
from TruthTorchLM import evaluators
|
|
8
|
+
from .evaluators import evaluate_truth_method
|
|
9
|
+
from .templates import DEFAULT_USER_PROMPT, DEFAULT_SYSTEM_PROMPT ##TODO import all?
|
|
10
|
+
from .availability import AVAILABLE_DATASETS, AVAILABLE_EVALUATION_METRICS
|
|
11
|
+
from TruthTorchLM import normalizers
|
|
12
|
+
|
|
13
|
+
from TruthTorchLM import long_form_generation
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
#__all__ = ['generate_with_truth_value']
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
AVAILABLE_API_MODELS = ['gpt-4o', 'gpt-4o-2024-05-13', 'gpt-4o-2024-08-06', 'chatgpt-4o-latest', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18',
|
|
2
|
+
'gpt-4-turbo','gpt-4-turbo-2024-04-09', 'gpt-4-turbo-preview', 'gpt-4-0125-preview', 'gpt-4-1106-preview', 'gpt-4',
|
|
3
|
+
'gpt-4-0613', 'gpt-4-0314', 'gpt-3.5-turbo-0125', 'gpt-3.5-turbo', 'gpt-3.5-turbo-1106', 'gpt-3.5-turbo-instruct', 'together_ai/togethercomputer/llama-2-70b']
|
|
4
|
+
|
|
5
|
+
PROB_AVAILABLE_API_MODELS = ['gpt-4o', 'gpt-4o-2024-05-13', 'gpt-4o-2024-08-06', 'chatgpt-4o-latest', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18',
|
|
6
|
+
'gpt-4-turbo','gpt-4-turbo-2024-04-09', 'gpt-4-turbo-preview', 'gpt-4-0125-preview', 'gpt-4-1106-preview', 'gpt-4',
|
|
7
|
+
'gpt-4-0613', 'gpt-4-0314', 'gpt-3.5-turbo-0125', 'gpt-3.5-turbo', 'gpt-3.5-turbo-1106', 'gpt-3.5-turbo-instruct', 'together_ai/togethercomputer/llama-2-70b']
|
|
8
|
+
|
|
9
|
+
ACTIVATION_AVAILABLE_API_MODELS = []
|
|
10
|
+
|
|
11
|
+
AVAILABLE_DATASETS = ['trivia_qa', 'gsm8k', 'natural_qa', 'pop_qa', 'simple_qa']
|
|
12
|
+
LONG_FORM_AVAILABLE_DATASETS = ['longfact_concepts', 'longfact_objects']
|
|
13
|
+
|
|
14
|
+
AVAILABLE_EVALUATION_METRICS = ['auroc', 'auprc', 'auarc', 'accuracy', 'f1', 'precision', 'recall', 'prr']
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerFast
|
|
2
|
+
from typing import Union
|
|
3
|
+
from TruthTorchLM.truth_methods import TruthMethod
|
|
4
|
+
from TruthTorchLM.evaluators import CorrectnessEvaluator, ROUGE
|
|
5
|
+
from TruthTorchLM.templates import DEFAULT_SYSTEM_BENCHMARK_PROMPT, DEFAULT_USER_PROMPT
|
|
6
|
+
from TruthTorchLM.utils.dataset_utils import get_dataset
|
|
7
|
+
from TruthTorchLM.utils.eval_utils import run_over_dataset
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def calibrate_truth_method(dataset: Union[str, list], model:Union[str,PreTrainedModel], truth_methods: list[TruthMethod], tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast] =None,
|
|
13
|
+
correctness_evaluator:CorrectnessEvaluator = ROUGE(0.7), size_of_data:float = 1.0, previous_context:list =[{'role': 'system', 'content': DEFAULT_SYSTEM_BENCHMARK_PROMPT}],
|
|
14
|
+
user_prompt:str = DEFAULT_USER_PROMPT, seed:int = 0, wandb_run = None, return_method_details:bool = False, wandb_push_method_details:bool = False, split = 'train', **kwargs):
|
|
15
|
+
|
|
16
|
+
dataset = get_dataset(dataset, size_of_data=size_of_data, seed=seed, split = split)
|
|
17
|
+
|
|
18
|
+
output_dict = run_over_dataset(dataset, model, truth_methods, tokenizer = tokenizer, correctness_evaluator = correctness_evaluator,
|
|
19
|
+
previous_context = previous_context, user_prompt = user_prompt, seed = seed, return_method_details = return_method_details,
|
|
20
|
+
wandb_run = wandb_run, wandb_push_method_details = wandb_push_method_details, **kwargs)
|
|
21
|
+
|
|
22
|
+
for i, truth_method in enumerate(truth_methods):
|
|
23
|
+
truth_values = output_dict[f'truth_method_{i}']['truth_values']
|
|
24
|
+
truth_values = np.array(truth_values)
|
|
25
|
+
truth_values[np.isnan(truth_values)] = 0
|
|
26
|
+
correctness = output_dict['generation_correctness']
|
|
27
|
+
#if generation_correctness is -1, it means that the model didn't attempt to generate an answer, remove those from the evaluation
|
|
28
|
+
truth_method.normalizer.calibrate(generation_performance_scores=correctness, truth_values=truth_values)
|
|
29
|
+
return output_dict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .correctness_evaluator import CorrectnessEvaluator
|
|
2
|
+
from .rouge import ROUGE
|
|
3
|
+
from .bleu import BLEU
|
|
4
|
+
from .model_judge import ModelJudge
|
|
5
|
+
from .eval_truth_method import evaluate_truth_method, get_metric_scores
|
|
6
|
+
|
|
7
|
+
__all__ = ['CorrectnessEvaluator', 'ROUGE', 'BLEU', 'evaluate_truth_method', 'ModelJudge', 'get_metric_scores']
|
|
8
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
from .correctness_evaluator import CorrectnessEvaluator
|
|
3
|
+
import evaluate
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BLEU(CorrectnessEvaluator):
|
|
7
|
+
def __init__(self, threshold: float = 0.5):
|
|
8
|
+
super().__init__()
|
|
9
|
+
self.threshold = threshold
|
|
10
|
+
self.bleu = evaluate.load('bleu')
|
|
11
|
+
|
|
12
|
+
def __call__(self, question_text:str, generated_text: str, ground_truths: list[str], seed:int = None) -> bool:
|
|
13
|
+
for i in range(len(ground_truths)):
|
|
14
|
+
bleu_results = self.bleu.compute(predictions = [generated_text], references=[ground_truths[i]])
|
|
15
|
+
if bleu_results['bleu'] > self.threshold:
|
|
16
|
+
return 1
|
|
17
|
+
return 0
|
|
18
|
+
|
|
19
|
+
def __str__(self):
|
|
20
|
+
return f"BLEU with threshold {self.threshold}"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
class CorrectnessEvaluator(ABC):
|
|
4
|
+
|
|
5
|
+
def __init__(self):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def __call__(self, question_text:str, generated_text: str, ground_truth_text: list[str], seed:int = None) -> int:
|
|
10
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def __str__(self):
|
|
14
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerFast
|
|
2
|
+
from typing import Union
|
|
3
|
+
from TruthTorchLM.truth_methods import TruthMethod
|
|
4
|
+
from .correctness_evaluator import CorrectnessEvaluator
|
|
5
|
+
from .rouge import ROUGE
|
|
6
|
+
from TruthTorchLM.availability import AVAILABLE_EVALUATION_METRICS
|
|
7
|
+
from TruthTorchLM.templates import DEFAULT_SYSTEM_BENCHMARK_PROMPT, DEFAULT_USER_PROMPT
|
|
8
|
+
from TruthTorchLM.utils.dataset_utils import get_dataset
|
|
9
|
+
from TruthTorchLM.utils.eval_utils import metric_score, run_over_dataset
|
|
10
|
+
import wandb
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def evaluate_truth_method(dataset: Union[str, list], model:Union[str,PreTrainedModel], truth_methods: list[TruthMethod], tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]=None, eval_metrics:list[str] = ['auroc'],
|
|
14
|
+
correctness_evaluator:CorrectnessEvaluator = ROUGE(0.7), size_of_data = 1.0, previous_context:list =[{'role': 'system', 'content': DEFAULT_SYSTEM_BENCHMARK_PROMPT}],
|
|
15
|
+
user_prompt:str = DEFAULT_USER_PROMPT, seed:int = 0, return_method_details:bool = False, wandb_run = None, wandb_push_method_details:bool = False,
|
|
16
|
+
batch_generation=True, add_generation_prompt = True, continue_final_message = False, split='test', **kwargs):
|
|
17
|
+
|
|
18
|
+
dataset = get_dataset(dataset, size_of_data=size_of_data, seed=seed, split = split)
|
|
19
|
+
|
|
20
|
+
for eval_metric in eval_metrics:
|
|
21
|
+
if eval_metric not in AVAILABLE_EVALUATION_METRICS:
|
|
22
|
+
raise ValueError(f"Evaluation metric {eval_metric} is not available. Available evaluation metrics are: {AVAILABLE_EVALUATION_METRICS}")
|
|
23
|
+
|
|
24
|
+
output_dict = run_over_dataset(dataset, model, truth_methods, tokenizer = tokenizer, correctness_evaluator = correctness_evaluator,
|
|
25
|
+
previous_context = previous_context, user_prompt = user_prompt, seed = seed, return_method_details = return_method_details,
|
|
26
|
+
wandb_run = wandb_run, wandb_push_method_details= wandb_push_method_details,
|
|
27
|
+
batch_generation=batch_generation, add_generation_prompt=add_generation_prompt, continue_final_message=continue_final_message, **kwargs)
|
|
28
|
+
|
|
29
|
+
eval_list = get_metric_scores(output_dict=output_dict, eval_metrics=eval_metrics, seed=seed)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if wandb_run:
|
|
33
|
+
wandb_run.log({'model_accuracy': sum(output_dict['generation_correctness'])/len(output_dict['generation_correctness'])})
|
|
34
|
+
|
|
35
|
+
eval_dict = eval_list[0]
|
|
36
|
+
for key, _ in eval_dict.items():
|
|
37
|
+
methods = []
|
|
38
|
+
scores = []
|
|
39
|
+
for i, cur_eval_dict in enumerate(eval_list):
|
|
40
|
+
score = cur_eval_dict[key]
|
|
41
|
+
scores.append(score)
|
|
42
|
+
methods.append(str(truth_methods[i].__class__.__name__))
|
|
43
|
+
wandb_run.log({f'{key}_of_method_{i}_{str(truth_methods[i].__class__.__name__)}': score})
|
|
44
|
+
|
|
45
|
+
data = [[method, score] for (method, score) in zip(methods, scores)]
|
|
46
|
+
table = wandb.Table(data=data, columns = ["methods", "scores"])
|
|
47
|
+
wandb.log({f"{key}" : wandb.plot.bar(table, "methods", "scores",
|
|
48
|
+
title=f"{key} Scores of Truth Methods")})
|
|
49
|
+
|
|
50
|
+
return {'eval_list': eval_list, 'output_dict': output_dict}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_metric_scores(output_dict:dict, eval_metrics:list[str], seed:int=0):
|
|
54
|
+
truth_methods = output_dict['truth_methods']
|
|
55
|
+
eval_list = []
|
|
56
|
+
for i in range(len(truth_methods)):
|
|
57
|
+
eval_dict = metric_score(eval_metrics, output_dict['generation_correctness'], output_dict[f'truth_method_{i}']['truth_values'], output_dict[f'truth_method_{i}']['normalized_truth_values'], seed=seed)
|
|
58
|
+
eval_list.append(eval_dict)
|
|
59
|
+
return eval_list
|