hallutok 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hallutok-0.1.0/PKG-INFO +243 -0
- hallutok-0.1.0/README.md +211 -0
- hallutok-0.1.0/hallutok/__init__.py +22 -0
- hallutok-0.1.0/hallutok/antihallucination/__init__.py +2 -0
- hallutok-0.1.0/hallutok/antihallucination/validator.py +207 -0
- hallutok-0.1.0/hallutok/client.py +214 -0
- hallutok-0.1.0/hallutok/optimizer/__init__.py +2 -0
- hallutok-0.1.0/hallutok/optimizer/token_optimizer.py +198 -0
- hallutok-0.1.0/hallutok/providers/__init__.py +4 -0
- hallutok-0.1.0/hallutok/providers/gemini_provider.py +82 -0
- hallutok-0.1.0/hallutok/providers/groq_provider.py +83 -0
- hallutok-0.1.0/hallutok.egg-info/PKG-INFO +243 -0
- hallutok-0.1.0/hallutok.egg-info/SOURCES.txt +17 -0
- hallutok-0.1.0/hallutok.egg-info/dependency_links.txt +1 -0
- hallutok-0.1.0/hallutok.egg-info/requires.txt +16 -0
- hallutok-0.1.0/hallutok.egg-info/top_level.txt +1 -0
- hallutok-0.1.0/pyproject.toml +47 -0
- hallutok-0.1.0/setup.cfg +4 -0
- hallutok-0.1.0/tests/test_hallutok.py +131 -0
hallutok-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hallutok
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Anti-Hallucination & Token Optimization library for Groq and Gemini APIs
|
|
5
|
+
Author-email: Joel Pawar <joelpawarwork@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/joelpawar08/hallutok
|
|
8
|
+
Project-URL: Issues, https://github.com/joelpawar08/hallutok/issues
|
|
9
|
+
Project-URL: Documentation, https://github.com/joelpawar08/hallutok#readme
|
|
10
|
+
Keywords: llm,hallucination,token-optimization,groq,gemini,ai
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Provides-Extra: groq
|
|
21
|
+
Requires-Dist: groq>=0.9.0; extra == "groq"
|
|
22
|
+
Provides-Extra: gemini
|
|
23
|
+
Requires-Dist: google-generativeai>=0.7.0; extra == "gemini"
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: groq>=0.9.0; extra == "all"
|
|
26
|
+
Requires-Dist: google-generativeai>=0.7.0; extra == "all"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
|
|
33
|
+
# 🛡️ Hallutok
|
|
34
|
+
|
|
35
|
+
**Anti-Hallucination & Token Optimization for Groq and Gemini APIs**
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/hallutok/)
|
|
38
|
+
[](https://www.python.org/)
|
|
39
|
+
[](LICENSE)
|
|
40
|
+
|
|
41
|
+
Hallutok solves two real problems that kill your API quota:
|
|
42
|
+
|
|
43
|
+
| Problem | Hallutok's Solution |
|
|
44
|
+
|---|---|
|
|
45
|
+
| Long prompts burning through tokens | `TokenOptimizer` compresses prompts before sending |
|
|
46
|
+
| LLM making up facts / hedging | `HallucinationValidator` scores and flags sketchy responses |
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## ✨ Features
|
|
51
|
+
|
|
52
|
+
- **Token Optimization** — whitespace cleanup, filler-phrase compression, deduplication, smart truncation, in-memory caching
|
|
53
|
+
- **Anti-Hallucination** — detects hedging language, ungrounded claims, numeric anomalies, contradictions
|
|
54
|
+
- **Groq + Gemini** — works with both APIs via thin, swappable provider adapters
|
|
55
|
+
- **Zero hard dependencies** — core library is pure Python; providers are optional extras
|
|
56
|
+
- **Savings reporting** — see exactly how many tokens you saved per call
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## 📦 Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# With Groq support
|
|
64
|
+
pip install hallutok[groq]
|
|
65
|
+
|
|
66
|
+
# With Gemini support
|
|
67
|
+
pip install hallutok[gemini]
|
|
68
|
+
|
|
69
|
+
# Both
|
|
70
|
+
pip install hallutok[all]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## 🚀 Quick Start
|
|
76
|
+
|
|
77
|
+
### Using Groq
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from hallutok import HallutokClient
|
|
81
|
+
|
|
82
|
+
# Factory shortcut
|
|
83
|
+
client = HallutokClient.with_groq(
|
|
84
|
+
api_key="gsk_your_groq_key",
|
|
85
|
+
model="llama3-8b-8192", # optional, this is the default
|
|
86
|
+
temperature=0.3, # lower = more factual
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
result = client.chat(
|
|
90
|
+
"Please note that I would like you to explain in order to help me "
|
|
91
|
+
"understand what black holes are and how they work. Can you please "
|
|
92
|
+
"provide a detailed explanation? It is important to note that I am "
|
|
93
|
+
"a beginner."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
print(result.response)
|
|
97
|
+
print(result.token_report)
|
|
98
|
+
# {'tokens_before': 48, 'tokens_after': 19, 'tokens_saved': 29, 'percent_saved': 60.4}
|
|
99
|
+
|
|
100
|
+
if result.validation.is_likely_hallucination:
|
|
101
|
+
print("⚠️ Flags:", result.validation.flags)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Using Gemini
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from hallutok import HallutokClient
|
|
108
|
+
|
|
109
|
+
client = HallutokClient.with_gemini(
|
|
110
|
+
api_key="AIza_your_gemini_key",
|
|
111
|
+
model="gemini-1.5-flash",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
result = client.chat("Explain quantum entanglement to a 10-year-old.")
|
|
115
|
+
print(result.response)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Using providers directly
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from hallutok import HallutokClient
|
|
122
|
+
from hallutok.providers import GroqProvider, GeminiProvider
|
|
123
|
+
|
|
124
|
+
# Swap providers without changing anything else
|
|
125
|
+
provider = GroqProvider(api_key="gsk_...", model="mixtral-8x7b-32768")
|
|
126
|
+
# provider = GeminiProvider(api_key="AIza_...", model="gemini-1.5-pro")
|
|
127
|
+
|
|
128
|
+
client = HallutokClient(
|
|
129
|
+
provider=provider,
|
|
130
|
+
optimize_tokens=True, # default: True
|
|
131
|
+
validate_responses=True, # default: True
|
|
132
|
+
max_prompt_tokens=512, # hard cap on prompt size
|
|
133
|
+
temperature=0.4,
|
|
134
|
+
max_response_tokens=1024,
|
|
135
|
+
system_prompt="You are a factual assistant. Cite sources when possible.",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result = client.chat("What causes inflation?")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## 🔧 Components
|
|
144
|
+
|
|
145
|
+
### TokenOptimizer
|
|
146
|
+
|
|
147
|
+
Use standalone if you only need compression:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from hallutok.optimizer import TokenOptimizer
|
|
151
|
+
|
|
152
|
+
opt = TokenOptimizer()
|
|
153
|
+
|
|
154
|
+
raw = """
|
|
155
|
+
Please note that I would like you to, in order to be helpful,
|
|
156
|
+
can you please explain, it is important to note that, machine learning
|
|
157
|
+
is a subset of AI. Machine learning is a subset of AI. Machine learning is a subset of AI.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
compressed = opt.optimize(raw, max_tokens=100)
|
|
161
|
+
print(compressed)
|
|
162
|
+
|
|
163
|
+
report = opt.savings_report(raw, compressed)
|
|
164
|
+
# {'tokens_before': 54, 'tokens_after': 12, 'tokens_saved': 42, 'percent_saved': 77.8}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
What the optimizer does, in order:
|
|
168
|
+
1. Normalize whitespace (collapse spaces, trim blank lines)
|
|
169
|
+
2. Strip boilerplate ("Please note that", "I would like you to", etc.)
|
|
170
|
+
3. Deduplicate repeated sentences
|
|
171
|
+
4. Replace verbose phrases ("in order to" → "to", "due to the fact that" → "because", …)
|
|
172
|
+
5. Truncate to `max_tokens` at a sentence boundary
|
|
173
|
+
|
|
174
|
+
### HallucinationValidator
|
|
175
|
+
|
|
176
|
+
Use standalone to audit any text:
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from hallutok.antihallucination import HallucinationValidator
|
|
180
|
+
|
|
181
|
+
validator = HallucinationValidator()
|
|
182
|
+
|
|
183
|
+
response = "I think maybe studies show that eating chocolate probably cures cancer."
|
|
184
|
+
result = validator.validate(response)
|
|
185
|
+
|
|
186
|
+
print(result.confidence_score) # e.g. 0.72
|
|
187
|
+
print(result.is_likely_hallucination) # True / False
|
|
188
|
+
print(result.flags) # list of issues found
|
|
189
|
+
print(result.warnings) # human-readable descriptions
|
|
190
|
+
print(result.suggestions) # what to do about it
|
|
191
|
+
print(result.cleaned_response) # response + disclaimer if flagged
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Detection layers:**
|
|
195
|
+
|
|
196
|
+
| Layer | What it catches |
|
|
197
|
+
|---|---|
|
|
198
|
+
| Hedging | "I think", "maybe", "perhaps", "I'm not sure", etc. |
|
|
199
|
+
| Ungrounded claims | "Studies show…", "Research suggests…" without citations |
|
|
200
|
+
| Numeric anomalies | Percentages over 100%, other implausible numbers |
|
|
201
|
+
| Contradictions | "always" + "never", "increases" + "decreases" in same text |
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 💡 Tips to Maximize Token Savings
|
|
206
|
+
|
|
207
|
+
1. **Avoid filler openers** — "Can you please", "I would like you to", "It is important that"
|
|
208
|
+
2. **Don't repeat yourself** — Hallutok deduplicates, but it's faster to not duplicate at all
|
|
209
|
+
3. **Use `max_prompt_tokens`** — set a hard cap so you never accidentally send a 4k-token prompt
|
|
210
|
+
4. **Lower the temperature** — `temperature=0.3` reduces hallucination risk significantly
|
|
211
|
+
5. **Use a system prompt** — instruct the model to cite sources and avoid speculation
|
|
212
|
+
6. **Check `token_report` per call** — it tells you exactly what was saved
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## 📊 ChatResult Fields
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
result.response # final (possibly cleaned) text
|
|
220
|
+
result.original_prompt # your original input
|
|
221
|
+
result.optimized_prompt # what was actually sent to the API
|
|
222
|
+
result.token_report # {tokens_before, tokens_after, tokens_saved, percent_saved}
|
|
223
|
+
result.validation # ValidationResult object
|
|
224
|
+
result.provider # "groq" or "gemini"
|
|
225
|
+
result.warnings # list of human-readable warnings
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## 🗺️ Roadmap
|
|
231
|
+
|
|
232
|
+
- [ ] Async support (`achat()`)
|
|
233
|
+
- [ ] Streaming responses
|
|
234
|
+
- [ ] OpenAI / Together AI provider adapters
|
|
235
|
+
- [ ] Per-call token budget enforcement
|
|
236
|
+
- [ ] Context window manager for multi-turn conversations
|
|
237
|
+
- [ ] More hallucination detection strategies (self-consistency, chain-of-thought verification)
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## 📄 License
|
|
242
|
+
|
|
243
|
+
MIT License — see [LICENSE](LICENSE) for details.ß
|
hallutok-0.1.0/README.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# 🛡️ Hallutok
|
|
2
|
+
|
|
3
|
+
**Anti-Hallucination & Token Optimization for Groq and Gemini APIs**
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/hallutok/)
|
|
6
|
+
[](https://www.python.org/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
Hallutok solves two real problems that kill your API quota:
|
|
10
|
+
|
|
11
|
+
| Problem | Hallutok's Solution |
|
|
12
|
+
|---|---|
|
|
13
|
+
| Long prompts burning through tokens | `TokenOptimizer` compresses prompts before sending |
|
|
14
|
+
| LLM making up facts / hedging | `HallucinationValidator` scores and flags sketchy responses |
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## ✨ Features
|
|
19
|
+
|
|
20
|
+
- **Token Optimization** — whitespace cleanup, filler-phrase compression, deduplication, smart truncation, in-memory caching
|
|
21
|
+
- **Anti-Hallucination** — detects hedging language, ungrounded claims, numeric anomalies, contradictions
|
|
22
|
+
- **Groq + Gemini** — works with both APIs via thin, swappable provider adapters
|
|
23
|
+
- **Zero hard dependencies** — core library is pure Python; providers are optional extras
|
|
24
|
+
- **Savings reporting** — see exactly how many tokens you saved per call
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 📦 Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# With Groq support
|
|
32
|
+
pip install hallutok[groq]
|
|
33
|
+
|
|
34
|
+
# With Gemini support
|
|
35
|
+
pip install hallutok[gemini]
|
|
36
|
+
|
|
37
|
+
# Both
|
|
38
|
+
pip install hallutok[all]
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 🚀 Quick Start
|
|
44
|
+
|
|
45
|
+
### Using Groq
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from hallutok import HallutokClient
|
|
49
|
+
|
|
50
|
+
# Factory shortcut
|
|
51
|
+
client = HallutokClient.with_groq(
|
|
52
|
+
api_key="gsk_your_groq_key",
|
|
53
|
+
model="llama3-8b-8192", # optional, this is the default
|
|
54
|
+
temperature=0.3, # lower = more factual
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
result = client.chat(
|
|
58
|
+
"Please note that I would like you to explain in order to help me "
|
|
59
|
+
"understand what black holes are and how they work. Can you please "
|
|
60
|
+
"provide a detailed explanation? It is important to note that I am "
|
|
61
|
+
"a beginner."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
print(result.response)
|
|
65
|
+
print(result.token_report)
|
|
66
|
+
# {'tokens_before': 48, 'tokens_after': 19, 'tokens_saved': 29, 'percent_saved': 60.4}
|
|
67
|
+
|
|
68
|
+
if result.validation.is_likely_hallucination:
|
|
69
|
+
print("⚠️ Flags:", result.validation.flags)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Using Gemini
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from hallutok import HallutokClient
|
|
76
|
+
|
|
77
|
+
client = HallutokClient.with_gemini(
|
|
78
|
+
api_key="AIza_your_gemini_key",
|
|
79
|
+
model="gemini-1.5-flash",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
result = client.chat("Explain quantum entanglement to a 10-year-old.")
|
|
83
|
+
print(result.response)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Using providers directly
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from hallutok import HallutokClient
|
|
90
|
+
from hallutok.providers import GroqProvider, GeminiProvider
|
|
91
|
+
|
|
92
|
+
# Swap providers without changing anything else
|
|
93
|
+
provider = GroqProvider(api_key="gsk_...", model="mixtral-8x7b-32768")
|
|
94
|
+
# provider = GeminiProvider(api_key="AIza_...", model="gemini-1.5-pro")
|
|
95
|
+
|
|
96
|
+
client = HallutokClient(
|
|
97
|
+
provider=provider,
|
|
98
|
+
optimize_tokens=True, # default: True
|
|
99
|
+
validate_responses=True, # default: True
|
|
100
|
+
max_prompt_tokens=512, # hard cap on prompt size
|
|
101
|
+
temperature=0.4,
|
|
102
|
+
max_response_tokens=1024,
|
|
103
|
+
system_prompt="You are a factual assistant. Cite sources when possible.",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
result = client.chat("What causes inflation?")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## 🔧 Components
|
|
112
|
+
|
|
113
|
+
### TokenOptimizer
|
|
114
|
+
|
|
115
|
+
Use standalone if you only need compression:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from hallutok.optimizer import TokenOptimizer
|
|
119
|
+
|
|
120
|
+
opt = TokenOptimizer()
|
|
121
|
+
|
|
122
|
+
raw = """
|
|
123
|
+
Please note that I would like you to, in order to be helpful,
|
|
124
|
+
can you please explain, it is important to note that, machine learning
|
|
125
|
+
is a subset of AI. Machine learning is a subset of AI. Machine learning is a subset of AI.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
compressed = opt.optimize(raw, max_tokens=100)
|
|
129
|
+
print(compressed)
|
|
130
|
+
|
|
131
|
+
report = opt.savings_report(raw, compressed)
|
|
132
|
+
# {'tokens_before': 54, 'tokens_after': 12, 'tokens_saved': 42, 'percent_saved': 77.8}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
What the optimizer does, in order:
|
|
136
|
+
1. Normalize whitespace (collapse spaces, trim blank lines)
|
|
137
|
+
2. Strip boilerplate ("Please note that", "I would like you to", etc.)
|
|
138
|
+
3. Deduplicate repeated sentences
|
|
139
|
+
4. Replace verbose phrases ("in order to" → "to", "due to the fact that" → "because", …)
|
|
140
|
+
5. Truncate to `max_tokens` at a sentence boundary
|
|
141
|
+
|
|
142
|
+
### HallucinationValidator
|
|
143
|
+
|
|
144
|
+
Use standalone to audit any text:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from hallutok.antihallucination import HallucinationValidator
|
|
148
|
+
|
|
149
|
+
validator = HallucinationValidator()
|
|
150
|
+
|
|
151
|
+
response = "I think maybe studies show that eating chocolate probably cures cancer."
|
|
152
|
+
result = validator.validate(response)
|
|
153
|
+
|
|
154
|
+
print(result.confidence_score) # e.g. 0.72
|
|
155
|
+
print(result.is_likely_hallucination) # True / False
|
|
156
|
+
print(result.flags) # list of issues found
|
|
157
|
+
print(result.warnings) # human-readable descriptions
|
|
158
|
+
print(result.suggestions) # what to do about it
|
|
159
|
+
print(result.cleaned_response) # response + disclaimer if flagged
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Detection layers:**
|
|
163
|
+
|
|
164
|
+
| Layer | What it catches |
|
|
165
|
+
|---|---|
|
|
166
|
+
| Hedging | "I think", "maybe", "perhaps", "I'm not sure", etc. |
|
|
167
|
+
| Ungrounded claims | "Studies show…", "Research suggests…" without citations |
|
|
168
|
+
| Numeric anomalies | Percentages over 100%, other implausible numbers |
|
|
169
|
+
| Contradictions | "always" + "never", "increases" + "decreases" in same text |
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## 💡 Tips to Maximize Token Savings
|
|
174
|
+
|
|
175
|
+
1. **Avoid filler openers** — "Can you please", "I would like you to", "It is important that"
|
|
176
|
+
2. **Don't repeat yourself** — Hallutok deduplicates, but it's faster to not duplicate at all
|
|
177
|
+
3. **Use `max_prompt_tokens`** — set a hard cap so you never accidentally send a 4k-token prompt
|
|
178
|
+
4. **Lower the temperature** — `temperature=0.3` reduces hallucination risk significantly
|
|
179
|
+
5. **Use a system prompt** — instruct the model to cite sources and avoid speculation
|
|
180
|
+
6. **Check `token_report` per call** — it tells you exactly what was saved
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## 📊 ChatResult Fields
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
result.response # final (possibly cleaned) text
|
|
188
|
+
result.original_prompt # your original input
|
|
189
|
+
result.optimized_prompt # what was actually sent to the API
|
|
190
|
+
result.token_report # {tokens_before, tokens_after, tokens_saved, percent_saved}
|
|
191
|
+
result.validation # ValidationResult object
|
|
192
|
+
result.provider # "groq" or "gemini"
|
|
193
|
+
result.warnings # list of human-readable warnings
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## 🗺️ Roadmap
|
|
199
|
+
|
|
200
|
+
- [ ] Async support (`achat()`)
|
|
201
|
+
- [ ] Streaming responses
|
|
202
|
+
- [ ] OpenAI / Together AI provider adapters
|
|
203
|
+
- [ ] Per-call token budget enforcement
|
|
204
|
+
- [ ] Context window manager for multi-turn conversations
|
|
205
|
+
- [ ] More hallucination detection strategies (self-consistency, chain-of-thought verification)
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## 📄 License
|
|
210
|
+
|
|
211
|
+
MIT License — see [LICENSE](LICENSE) for details.ß
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hallutok - Anti-Hallucination & Token Optimization Package
|
|
3
|
+
==========================================================
|
|
4
|
+
Compatible with Groq API and Google Gemini API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .client import HallutokClient
|
|
8
|
+
from .optimizer.token_optimizer import TokenOptimizer
|
|
9
|
+
from .antihallucination.validator import HallucinationValidator
|
|
10
|
+
from .providers.groq_provider import GroqProvider
|
|
11
|
+
from .providers.gemini_provider import GeminiProvider
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
__author__ = "Hallutok"
|
|
15
|
+
__all__ = [
|
|
16
|
+
"HallutokClient",
|
|
17
|
+
"TokenOptimizer",
|
|
18
|
+
"HallucinationValidator",
|
|
19
|
+
"GroqProvider",
|
|
20
|
+
"GeminiProvider",
|
|
21
|
+
"By Joel Pawar (AI Engineer LoveLocal)"
|
|
22
|
+
]
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hallucination Validator Module
|
|
3
|
+
==============================
|
|
4
|
+
Detects and mitigates common LLM hallucination patterns in generated text.
|
|
5
|
+
|
|
6
|
+
Strategies used:
|
|
7
|
+
1. Confidence keyword detection – flags hedging language ("I think", "maybe")
|
|
8
|
+
2. Factual consistency check – cross-checks key claims via self-ask retry
|
|
9
|
+
3. Citation/source detection – warns when claims lack grounding
|
|
10
|
+
4. Numeric anomaly detection – flags implausible numbers
|
|
11
|
+
5. Contradiction detection – simple intra-response contradiction scan
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ValidationResult:
|
|
21
|
+
"""Result of a hallucination-validation pass."""
|
|
22
|
+
|
|
23
|
+
is_likely_hallucination: bool = False
|
|
24
|
+
confidence_score: float = 1.0 # 0 = certain hallucination, 1 = clean
|
|
25
|
+
flags: list[str] = field(default_factory=list)
|
|
26
|
+
warnings: list[str] = field(default_factory=list)
|
|
27
|
+
suggestions: list[str] = field(default_factory=list)
|
|
28
|
+
cleaned_response: Optional[str] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class HallucinationValidator:
|
|
32
|
+
"""
|
|
33
|
+
Validates LLM responses for hallucination signals.
|
|
34
|
+
|
|
35
|
+
Usage::
|
|
36
|
+
|
|
37
|
+
validator = HallucinationValidator()
|
|
38
|
+
result = validator.validate(response_text)
|
|
39
|
+
if result.is_likely_hallucination:
|
|
40
|
+
print(result.flags)
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# --- Configurable thresholds ---
|
|
44
|
+
HEDGE_PENALTY = 0.08 # deducted per hedging phrase found
|
|
45
|
+
UNGROUNDED_PENALTY = 0.10 # deducted for ungrounded factual claims
|
|
46
|
+
NUMERIC_PENALTY = 0.12 # deducted per suspicious number
|
|
47
|
+
CONTRADICT_PENALTY = 0.15 # deducted per detected contradiction
|
|
48
|
+
HALLUCINATION_THRESHOLD = 0.75 # below this → flagged as likely hallucination
|
|
49
|
+
|
|
50
|
+
# Hedging / uncertainty markers
|
|
51
|
+
HEDGE_PATTERNS = [
|
|
52
|
+
r"\bI think\b", r"\bI believe\b", r"\bI'm not sure\b",
|
|
53
|
+
r"\bprobably\b", r"\bmaybe\b", r"\bperhaps\b",
|
|
54
|
+
r"\bit might be\b", r"\bit could be\b", r"\bpossibly\b",
|
|
55
|
+
r"\bI'm not certain\b", r"\bas far as I know\b",
|
|
56
|
+
r"\bto my knowledge\b", r"\bI (may|might) be wrong\b",
|
|
57
|
+
r"\bnot 100%\b", r"\bI cannot confirm\b",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# Signals of ungrounded factual claims (no citation/source)
|
|
61
|
+
UNGROUNDED_PATTERNS = [
|
|
62
|
+
r"\bstudies (show|suggest|indicate)\b",
|
|
63
|
+
r"\baccording to experts\b",
|
|
64
|
+
r"\bresearch (shows|suggests|has shown)\b",
|
|
65
|
+
r"\bscientists (say|found|discovered)\b",
|
|
66
|
+
r"\bstatistics show\b",
|
|
67
|
+
r"\bdata (shows|indicates|suggests)\b",
|
|
68
|
+
r"\bevidence (shows|suggests)\b",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
# Plausible numeric ranges that are ALMOST ALWAYS wrong if exceeded
|
|
72
|
+
NUMERIC_SANITY_CHECKS = [
|
|
73
|
+
(r"\b(\d{1,3}(?:,\d{3})*)\s*%", 100), # percentages > 100
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# Simple contradiction pairs
|
|
77
|
+
CONTRADICTION_PAIRS = [
|
|
78
|
+
(r"\balways\b", r"\bnever\b"),
|
|
79
|
+
(r"\ball\b", r"\bnone\b"),
|
|
80
|
+
(r"\bincreases?\b", r"\bdecreases?\b"),
|
|
81
|
+
(r"\bimproves?\b", r"\bworsens?\b"),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
def validate(self, response: str, prompt: Optional[str] = None) -> ValidationResult:
|
|
85
|
+
"""
|
|
86
|
+
Run all validation checks on *response*.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
response: The LLM-generated text to validate.
|
|
90
|
+
prompt: (Optional) original prompt — used for future
|
|
91
|
+
context-aware checks.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
A :class:`ValidationResult` instance.
|
|
95
|
+
"""
|
|
96
|
+
result = ValidationResult(cleaned_response=response)
|
|
97
|
+
score = 1.0
|
|
98
|
+
|
|
99
|
+
score, result = self._check_hedging(response, score, result)
|
|
100
|
+
score, result = self._check_ungrounded_claims(response, score, result)
|
|
101
|
+
score, result = self._check_numeric_anomalies(response, score, result)
|
|
102
|
+
score, result = self._check_contradictions(response, score, result)
|
|
103
|
+
|
|
104
|
+
score = max(0.0, min(1.0, score))
|
|
105
|
+
result.confidence_score = round(score, 3)
|
|
106
|
+
result.is_likely_hallucination = score < self.HALLUCINATION_THRESHOLD
|
|
107
|
+
|
|
108
|
+
if result.is_likely_hallucination:
|
|
109
|
+
result.suggestions.append(
|
|
110
|
+
"Consider asking the model to cite sources or verify claims."
|
|
111
|
+
)
|
|
112
|
+
result.suggestions.append(
|
|
113
|
+
"Use a lower temperature setting to reduce creativity-driven errors."
|
|
114
|
+
)
|
|
115
|
+
if result.warnings:
|
|
116
|
+
result.cleaned_response = self._soft_clean(response)
|
|
117
|
+
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# Checks
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
def _check_hedging(self, text: str, score: float, result: ValidationResult):
|
|
125
|
+
hits = []
|
|
126
|
+
for pattern in self.HEDGE_PATTERNS:
|
|
127
|
+
if re.search(pattern, text, re.IGNORECASE):
|
|
128
|
+
hits.append(pattern.replace(r"\b", "").strip())
|
|
129
|
+
if hits:
|
|
130
|
+
penalty = min(self.HEDGE_PENALTY * len(hits), 0.35)
|
|
131
|
+
score -= penalty
|
|
132
|
+
result.flags.append(f"Hedging language detected ({len(hits)} markers)")
|
|
133
|
+
result.warnings.append(
|
|
134
|
+
f"Response uses uncertain language: {', '.join(hits[:5])}"
|
|
135
|
+
)
|
|
136
|
+
return score, result
|
|
137
|
+
|
|
138
|
+
def _check_ungrounded_claims(self, text: str, score: float, result: ValidationResult):
|
|
139
|
+
hits = []
|
|
140
|
+
for pattern in self.UNGROUNDED_PATTERNS:
|
|
141
|
+
matches = re.findall(pattern, text, re.IGNORECASE)
|
|
142
|
+
hits.extend(matches)
|
|
143
|
+
|
|
144
|
+
# Check if any proper citation exists in the text
|
|
145
|
+
citation_present = bool(re.search(r"https?://|doi:|et al\.|ibid\.|\[\d+\]", text))
|
|
146
|
+
|
|
147
|
+
if hits and not citation_present:
|
|
148
|
+
penalty = min(self.UNGROUNDED_PENALTY * len(hits), 0.30)
|
|
149
|
+
score -= penalty
|
|
150
|
+
result.flags.append(f"Ungrounded factual claims ({len(hits)} found, no citations)")
|
|
151
|
+
result.warnings.append(
|
|
152
|
+
"Factual claims made without citing sources — high hallucination risk."
|
|
153
|
+
)
|
|
154
|
+
return score, result
|
|
155
|
+
|
|
156
|
+
def _check_numeric_anomalies(self, text: str, score: float, result: ValidationResult):
|
|
157
|
+
for pattern, max_val in self.NUMERIC_SANITY_CHECKS:
|
|
158
|
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
159
|
+
raw = match.group(1).replace(",", "")
|
|
160
|
+
try:
|
|
161
|
+
value = float(raw)
|
|
162
|
+
if value > max_val:
|
|
163
|
+
score -= self.NUMERIC_PENALTY
|
|
164
|
+
result.flags.append(
|
|
165
|
+
f"Implausible numeric value: {match.group(0).strip()!r} (exceeds {max_val})"
|
|
166
|
+
)
|
|
167
|
+
result.warnings.append(
|
|
168
|
+
f"Numeric value {match.group(0).strip()!r} looks implausible."
|
|
169
|
+
)
|
|
170
|
+
except ValueError:
|
|
171
|
+
pass
|
|
172
|
+
return score, result
|
|
173
|
+
|
|
174
|
+
def _check_contradictions(self, text: str, score: float, result: ValidationResult):
|
|
175
|
+
text_lower = text.lower()
|
|
176
|
+
for a_pat, b_pat in self.CONTRADICTION_PAIRS:
|
|
177
|
+
a = re.search(a_pat, text_lower)
|
|
178
|
+
b = re.search(b_pat, text_lower)
|
|
179
|
+
if a and b:
|
|
180
|
+
score -= self.CONTRADICT_PENALTY
|
|
181
|
+
a_word = a_pat.replace(r"\b", "")
|
|
182
|
+
b_word = b_pat.replace(r"\b", "")
|
|
183
|
+
result.flags.append(
|
|
184
|
+
f"Possible contradiction: '{a_word}' and '{b_word}' both appear"
|
|
185
|
+
)
|
|
186
|
+
result.warnings.append(
|
|
187
|
+
f"Contradictory terms detected: '{a_word}' vs '{b_word}'."
|
|
188
|
+
)
|
|
189
|
+
return score, result
|
|
190
|
+
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
# Helpers
|
|
193
|
+
# ------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def _soft_clean(text: str) -> str:
|
|
197
|
+
"""
|
|
198
|
+
Light cleanup: adds a disclaimer at the end when hallucination
|
|
199
|
+
risk is detected, rather than silently mutating the content.
|
|
200
|
+
"""
|
|
201
|
+
disclaimer = (
|
|
202
|
+
"\n\n[⚠️ Hallutok Warning: This response may contain uncertain or "
|
|
203
|
+
"unverified information. Please cross-check important claims.]"
|
|
204
|
+
)
|
|
205
|
+
if disclaimer not in text:
|
|
206
|
+
return text + disclaimer
|
|
207
|
+
return text
|