hallx 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hallx-1.0.0/LICENSE +21 -0
- hallx-1.0.0/PKG-INFO +235 -0
- hallx-1.0.0/README.md +203 -0
- hallx-1.0.0/hallx/__init__.py +33 -0
- hallx-1.0.0/hallx/adapters/__init__.py +21 -0
- hallx-1.0.0/hallx/adapters/anthropic.py +42 -0
- hallx-1.0.0/hallx/adapters/base.py +105 -0
- hallx-1.0.0/hallx/adapters/gemini.py +69 -0
- hallx-1.0.0/hallx/adapters/grok.py +34 -0
- hallx-1.0.0/hallx/adapters/huggingface.py +50 -0
- hallx-1.0.0/hallx/adapters/ollama.py +71 -0
- hallx-1.0.0/hallx/adapters/openai.py +34 -0
- hallx-1.0.0/hallx/adapters/openrouter.py +34 -0
- hallx-1.0.0/hallx/adapters/perplexity.py +34 -0
- hallx-1.0.0/hallx/calibration.py +263 -0
- hallx-1.0.0/hallx/consistency.py +137 -0
- hallx-1.0.0/hallx/core.py +313 -0
- hallx-1.0.0/hallx/grounding.py +152 -0
- hallx-1.0.0/hallx/py.typed +1 -0
- hallx-1.0.0/hallx/retry.py +61 -0
- hallx-1.0.0/hallx/schema.py +100 -0
- hallx-1.0.0/hallx/scoring.py +71 -0
- hallx-1.0.0/hallx/types.py +55 -0
- hallx-1.0.0/hallx/utils/text.py +25 -0
- hallx-1.0.0/hallx.egg-info/PKG-INFO +235 -0
- hallx-1.0.0/hallx.egg-info/SOURCES.txt +38 -0
- hallx-1.0.0/hallx.egg-info/dependency_links.txt +1 -0
- hallx-1.0.0/hallx.egg-info/requires.txt +8 -0
- hallx-1.0.0/hallx.egg-info/top_level.txt +1 -0
- hallx-1.0.0/pyproject.toml +59 -0
- hallx-1.0.0/setup.cfg +4 -0
- hallx-1.0.0/tests/test_adapters.py +57 -0
- hallx-1.0.0/tests/test_calibration.py +67 -0
- hallx-1.0.0/tests/test_calibration_paths.py +16 -0
- hallx-1.0.0/tests/test_consistency.py +53 -0
- hallx-1.0.0/tests/test_grounding.py +32 -0
- hallx-1.0.0/tests/test_production.py +60 -0
- hallx-1.0.0/tests/test_retry.py +27 -0
- hallx-1.0.0/tests/test_schema.py +53 -0
- hallx-1.0.0/tests/test_scoring.py +132 -0
hallx-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 hallx contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
hallx-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hallx
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Lightweight hallucination risk scoring for LLM outputs
|
|
5
|
+
Author: Dhanush Kandhan
|
|
6
|
+
Maintainer: Dhanush Kandhan
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/dhanushk-offl/hallx
|
|
9
|
+
Project-URL: Repository, https://github.com/dhanushk-offl/hallx
|
|
10
|
+
Project-URL: Issues, https://github.com/dhanushk-offl/hallx/issues
|
|
11
|
+
Project-URL: Documentation, https://github.com/dhanushk-offl/hallx#readme
|
|
12
|
+
Keywords: llm,hallucination,validation,scoring,ai
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: jsonschema<5.0.0,>=4.0.0
|
|
25
|
+
Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest<9.0.0,>=8.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio<1.0.0,>=0.23.0; extra == "dev"
|
|
29
|
+
Requires-Dist: build<2.0.0,>=1.2.0; extra == "dev"
|
|
30
|
+
Requires-Dist: twine<7.0.0,>=6.0.0; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# hallx
|
|
34
|
+
|
|
35
|
+
[](https://github.com/dhanushk-offl/hallx/actions/workflows/test.yml)
|
|
36
|
+
[](https://github.com/dhanushk-offl/hallx/actions/workflows/release.yml)
|
|
37
|
+
[](https://scorecard.dev/viewer/?uri=github.com/dhanushk-offl/hallx)
|
|
38
|
+
[](https://pypi.org/project/hallx/)
|
|
39
|
+
[](https://pypi.org/project/hallx/)
|
|
40
|
+
[](LICENSE)
|
|
41
|
+
|
|
42
|
+
Lightweight hallucination-risk scoring for production LLM pipelines.
|
|
43
|
+
|
|
44
|
+
## Overview
|
|
45
|
+
|
|
46
|
+
| Area | What Hallx Provides |
|
|
47
|
+
|---|---|
|
|
48
|
+
| Risk output | `confidence` (`0.0` to `1.0`) and `risk_level` (`high`, `medium`, `low`) |
|
|
49
|
+
| Diagnostics | `issues` list for tracing weak signals and policy failures |
|
|
50
|
+
| Actionability | `recommendation` payload (`action`, `suggested_temperature`, `suggestions`) |
|
|
51
|
+
| API modes | Sync and async checks |
|
|
52
|
+
| Integrations | Adapter-based and callable-based workflows |
|
|
53
|
+
| Operations | Feedback storage and calibration reporting |
|
|
54
|
+
|
|
55
|
+
Hallx is designed as a guardrail layer before downstream actions such as API responses, automation steps, and database writes.
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install hallx
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Development install:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -e .[dev]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from hallx import Hallx
|
|
73
|
+
|
|
74
|
+
checker = Hallx(profile="balanced", strict=False)
|
|
75
|
+
result = checker.check(
|
|
76
|
+
prompt="Summarize refund policy",
|
|
77
|
+
response={"summary": "Refunds are allowed within 30 days."},
|
|
78
|
+
context=["Refunds are allowed within 30 days of purchase."],
|
|
79
|
+
schema={
|
|
80
|
+
"type": "object",
|
|
81
|
+
"properties": {"summary": {"type": "string"}},
|
|
82
|
+
"required": ["summary"],
|
|
83
|
+
"additionalProperties": False,
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
print(result.confidence, result.risk_level)
|
|
88
|
+
print(result.scores)
|
|
89
|
+
print(result.issues)
|
|
90
|
+
print(result.recommendation)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Scoring Model
|
|
94
|
+
|
|
95
|
+
Hallx uses heuristic risk scoring across three signals:
|
|
96
|
+
|
|
97
|
+
| Signal | Description |
|
|
98
|
+
|---|---|
|
|
99
|
+
| `schema` | JSON schema validity and null-injection checks |
|
|
100
|
+
| `consistency` | Stability across repeated generations |
|
|
101
|
+
| `grounding` | Claim-context alignment and source-integrity checks |
|
|
102
|
+
|
|
103
|
+
Confidence formula:
|
|
104
|
+
|
|
105
|
+
```text
|
|
106
|
+
confidence = clamp(
|
|
107
|
+
schema_score * w_schema +
|
|
108
|
+
consistency_score * w_consistency +
|
|
109
|
+
grounding_score * w_grounding,
|
|
110
|
+
0.0, 1.0
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Default (`balanced`) weights:
|
|
115
|
+
|
|
116
|
+
| Weight | Value |
|
|
117
|
+
|---|---|
|
|
118
|
+
| `w_schema` | `0.34` |
|
|
119
|
+
| `w_consistency` | `0.33` |
|
|
120
|
+
| `w_grounding` | `0.33` |
|
|
121
|
+
|
|
122
|
+
Risk mapping:
|
|
123
|
+
|
|
124
|
+
| Confidence range | Risk |
|
|
125
|
+
|---|---|
|
|
126
|
+
| `< 0.40` | `high` |
|
|
127
|
+
| `< 0.75` | `medium` |
|
|
128
|
+
| `>= 0.75` | `low` |
|
|
129
|
+
|
|
130
|
+
Note: skipped checks are penalized by default to avoid over-trusting partial analysis.
|
|
131
|
+
|
|
132
|
+
## Safety Profiles
|
|
133
|
+
|
|
134
|
+
| Profile | Goal | Default `consistency_runs` | Skip penalty |
|
|
135
|
+
|---|---|---:|---:|
|
|
136
|
+
| `fast` | lower latency | 2 | 0.15 |
|
|
137
|
+
| `balanced` | general-purpose | 3 | 0.25 |
|
|
138
|
+
| `strict` | stronger scrutiny | 4 | 0.40 |
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from hallx import Hallx
|
|
142
|
+
|
|
143
|
+
checker = Hallx(profile="strict")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
You can override `weights`, `consistency_runs`, and `skip_penalty` as needed.
|
|
147
|
+
|
|
148
|
+
## Workflow
|
|
149
|
+
|
|
150
|
+

|
|
151
|
+
|
|
152
|
+
1. Collect prompt, optional context, optional schema.
|
|
153
|
+
2. Generate a model response through an adapter or callable.
|
|
154
|
+
3. Run `schema`, `consistency`, and `grounding` checks.
|
|
155
|
+
4. Aggregate scores into `confidence` and `risk_level`.
|
|
156
|
+
5. Apply policy (`proceed` or `retry`) using recommendation metadata.
|
|
157
|
+
6. Optionally record reviewed outcomes for calibration.
|
|
158
|
+
|
|
159
|
+
## Adapters
|
|
160
|
+
|
|
161
|
+
| Provider adapter |
|
|
162
|
+
|---|
|
|
163
|
+
| OpenAI |
|
|
164
|
+
| Anthropic |
|
|
165
|
+
| Gemini |
|
|
166
|
+
| OpenRouter |
|
|
167
|
+
| Perplexity |
|
|
168
|
+
| Grok |
|
|
169
|
+
| HuggingFace |
|
|
170
|
+
| Ollama |
|
|
171
|
+
|
|
172
|
+
## Samples
|
|
173
|
+
|
|
174
|
+
| Sample | Purpose |
|
|
175
|
+
|---|---|
|
|
176
|
+
| `samples/basic_sync.py` | minimal sync workflow |
|
|
177
|
+
| `samples/async_openai_adapter.py` | async provider check with context |
|
|
178
|
+
| `samples/async_openai_adapter_no_context.py` | no-context behavior and weighting example |
|
|
179
|
+
| `samples/retry_strategy.py` | recommendation-driven retry policy |
|
|
180
|
+
| `samples/strict_mode.py` | strict blocking behavior |
|
|
181
|
+
| `samples/feedback_calibration.py` | local feedback storage and calibration report |
|
|
182
|
+
| `samples/async_openai_feedback_calibration.py` | async generation + feedback in one loop |
|
|
183
|
+
|
|
184
|
+
## Feedback Storage and Calibration
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from hallx import Hallx
|
|
188
|
+
|
|
189
|
+
checker = Hallx(feedback_db_path="/var/lib/myapp/hallx-feedback.sqlite3")
|
|
190
|
+
|
|
191
|
+
result = checker.check(prompt="p", response="r", context=["c"])
|
|
192
|
+
checker.record_outcome(
|
|
193
|
+
result=result,
|
|
194
|
+
label="hallucinated", # aliases: safe -> correct, unsafe -> hallucinated
|
|
195
|
+
metadata={"reviewer": "qa-team"},
|
|
196
|
+
prompt="p",
|
|
197
|
+
response_excerpt="r",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
report = checker.calibration_report(window_days=30)
|
|
201
|
+
print(report["suggested_threshold"], report["threshold_metrics"])
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Default DB path resolution:
|
|
205
|
+
|
|
206
|
+
| Environment | Default path |
|
|
207
|
+
|---|---|
|
|
208
|
+
| Env override | `HALLX_FEEDBACK_DB` |
|
|
209
|
+
| Windows | `%LOCALAPPDATA%\\hallx\\feedback.sqlite3` (fallback `%APPDATA%`) |
|
|
210
|
+
| macOS | `~/Library/Application Support/hallx/feedback.sqlite3` |
|
|
211
|
+
| Linux/servers | `$XDG_DATA_HOME/hallx/feedback.sqlite3` or `~/.local/share/hallx/feedback.sqlite3` |
|
|
212
|
+
|
|
213
|
+
## Production Notes
|
|
214
|
+
|
|
215
|
+
| Recommendation | Why |
|
|
216
|
+
|---|---|
|
|
217
|
+
| Enable strict mode on sensitive paths | block high-risk responses before side effects |
|
|
218
|
+
| Log `confidence`, `risk_level`, `issues` | support auditing and threshold tuning |
|
|
219
|
+
| Use calibration report regularly | adjust thresholds with real reviewed outcomes |
|
|
220
|
+
| Keep context quality high | grounding quality depends on evidence quality |
|
|
221
|
+
|
|
222
|
+
## Known Limitations
|
|
223
|
+
|
|
224
|
+
- Hallx is heuristic and does not provide formal factual guarantees.
|
|
225
|
+
- High confidence can still be wrong if context is missing, stale, or incorrect.
|
|
226
|
+
- Similarity-based checks can miss nuanced semantic contradictions.
|
|
227
|
+
- High-stakes domains should combine Hallx with domain validators and human review.
|
|
228
|
+
|
|
229
|
+
## Documentation
|
|
230
|
+
|
|
231
|
+
- [Usage Guide](docs/USAGE.md)
|
|
232
|
+
- [Production Notes](docs/PRODUCTION.md)
|
|
233
|
+
- [Contributing Guide](CONTRIBUTING.md)
|
|
234
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md)
|
|
235
|
+
- [License](LICENSE)
|
hallx-1.0.0/README.md
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# hallx
|
|
2
|
+
|
|
3
|
+
[](https://github.com/dhanushk-offl/hallx/actions/workflows/test.yml)
|
|
4
|
+
[](https://github.com/dhanushk-offl/hallx/actions/workflows/release.yml)
|
|
5
|
+
[](https://scorecard.dev/viewer/?uri=github.com/dhanushk-offl/hallx)
|
|
6
|
+
[](https://pypi.org/project/hallx/)
|
|
7
|
+
[](https://pypi.org/project/hallx/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
Lightweight hallucination-risk scoring for production LLM pipelines.
|
|
11
|
+
|
|
12
|
+
## Overview
|
|
13
|
+
|
|
14
|
+
| Area | What Hallx Provides |
|
|
15
|
+
|---|---|
|
|
16
|
+
| Risk output | `confidence` (`0.0` to `1.0`) and `risk_level` (`high`, `medium`, `low`) |
|
|
17
|
+
| Diagnostics | `issues` list for tracing weak signals and policy failures |
|
|
18
|
+
| Actionability | `recommendation` payload (`action`, `suggested_temperature`, `suggestions`) |
|
|
19
|
+
| API modes | Sync and async checks |
|
|
20
|
+
| Integrations | Adapter-based and callable-based workflows |
|
|
21
|
+
| Operations | Feedback storage and calibration reporting |
|
|
22
|
+
|
|
23
|
+
Hallx is designed as a guardrail layer before downstream actions such as API responses, automation steps, and database writes.
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install hallx
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Development install:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install -e .[dev]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from hallx import Hallx
|
|
41
|
+
|
|
42
|
+
checker = Hallx(profile="balanced", strict=False)
|
|
43
|
+
result = checker.check(
|
|
44
|
+
prompt="Summarize refund policy",
|
|
45
|
+
response={"summary": "Refunds are allowed within 30 days."},
|
|
46
|
+
context=["Refunds are allowed within 30 days of purchase."],
|
|
47
|
+
schema={
|
|
48
|
+
"type": "object",
|
|
49
|
+
"properties": {"summary": {"type": "string"}},
|
|
50
|
+
"required": ["summary"],
|
|
51
|
+
"additionalProperties": False,
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
print(result.confidence, result.risk_level)
|
|
56
|
+
print(result.scores)
|
|
57
|
+
print(result.issues)
|
|
58
|
+
print(result.recommendation)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Scoring Model
|
|
62
|
+
|
|
63
|
+
Hallx uses heuristic risk scoring across three signals:
|
|
64
|
+
|
|
65
|
+
| Signal | Description |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `schema` | JSON schema validity and null-injection checks |
|
|
68
|
+
| `consistency` | Stability across repeated generations |
|
|
69
|
+
| `grounding` | Claim-context alignment and source-integrity checks |
|
|
70
|
+
|
|
71
|
+
Confidence formula:
|
|
72
|
+
|
|
73
|
+
```text
|
|
74
|
+
confidence = clamp(
|
|
75
|
+
schema_score * w_schema +
|
|
76
|
+
consistency_score * w_consistency +
|
|
77
|
+
grounding_score * w_grounding,
|
|
78
|
+
0.0, 1.0
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Default (`balanced`) weights:
|
|
83
|
+
|
|
84
|
+
| Weight | Value |
|
|
85
|
+
|---|---|
|
|
86
|
+
| `w_schema` | `0.34` |
|
|
87
|
+
| `w_consistency` | `0.33` |
|
|
88
|
+
| `w_grounding` | `0.33` |
|
|
89
|
+
|
|
90
|
+
Risk mapping:
|
|
91
|
+
|
|
92
|
+
| Confidence range | Risk |
|
|
93
|
+
|---|---|
|
|
94
|
+
| `< 0.40` | `high` |
|
|
95
|
+
| `< 0.75` | `medium` |
|
|
96
|
+
| `>= 0.75` | `low` |
|
|
97
|
+
|
|
98
|
+
Note: skipped checks are penalized by default to avoid over-trusting partial analysis.
|
|
99
|
+
|
|
100
|
+
## Safety Profiles
|
|
101
|
+
|
|
102
|
+
| Profile | Goal | Default `consistency_runs` | Skip penalty |
|
|
103
|
+
|---|---|---:|---:|
|
|
104
|
+
| `fast` | lower latency | 2 | 0.15 |
|
|
105
|
+
| `balanced` | general-purpose | 3 | 0.25 |
|
|
106
|
+
| `strict` | stronger scrutiny | 4 | 0.40 |
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from hallx import Hallx
|
|
110
|
+
|
|
111
|
+
checker = Hallx(profile="strict")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
You can override `weights`, `consistency_runs`, and `skip_penalty` as needed.
|
|
115
|
+
|
|
116
|
+
## Workflow
|
|
117
|
+
|
|
118
|
+

|
|
119
|
+
|
|
120
|
+
1. Collect prompt, optional context, optional schema.
|
|
121
|
+
2. Generate a model response through an adapter or callable.
|
|
122
|
+
3. Run `schema`, `consistency`, and `grounding` checks.
|
|
123
|
+
4. Aggregate scores into `confidence` and `risk_level`.
|
|
124
|
+
5. Apply policy (`proceed` or `retry`) using recommendation metadata.
|
|
125
|
+
6. Optionally record reviewed outcomes for calibration.
|
|
126
|
+
|
|
127
|
+
## Adapters
|
|
128
|
+
|
|
129
|
+
| Provider adapter |
|
|
130
|
+
|---|
|
|
131
|
+
| OpenAI |
|
|
132
|
+
| Anthropic |
|
|
133
|
+
| Gemini |
|
|
134
|
+
| OpenRouter |
|
|
135
|
+
| Perplexity |
|
|
136
|
+
| Grok |
|
|
137
|
+
| HuggingFace |
|
|
138
|
+
| Ollama |
|
|
139
|
+
|
|
140
|
+
## Samples
|
|
141
|
+
|
|
142
|
+
| Sample | Purpose |
|
|
143
|
+
|---|---|
|
|
144
|
+
| `samples/basic_sync.py` | minimal sync workflow |
|
|
145
|
+
| `samples/async_openai_adapter.py` | async provider check with context |
|
|
146
|
+
| `samples/async_openai_adapter_no_context.py` | no-context behavior and weighting example |
|
|
147
|
+
| `samples/retry_strategy.py` | recommendation-driven retry policy |
|
|
148
|
+
| `samples/strict_mode.py` | strict blocking behavior |
|
|
149
|
+
| `samples/feedback_calibration.py` | local feedback storage and calibration report |
|
|
150
|
+
| `samples/async_openai_feedback_calibration.py` | async generation + feedback in one loop |
|
|
151
|
+
|
|
152
|
+
## Feedback Storage and Calibration
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from hallx import Hallx
|
|
156
|
+
|
|
157
|
+
checker = Hallx(feedback_db_path="/var/lib/myapp/hallx-feedback.sqlite3")
|
|
158
|
+
|
|
159
|
+
result = checker.check(prompt="p", response="r", context=["c"])
|
|
160
|
+
checker.record_outcome(
|
|
161
|
+
result=result,
|
|
162
|
+
label="hallucinated", # aliases: safe -> correct, unsafe -> hallucinated
|
|
163
|
+
metadata={"reviewer": "qa-team"},
|
|
164
|
+
prompt="p",
|
|
165
|
+
response_excerpt="r",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
report = checker.calibration_report(window_days=30)
|
|
169
|
+
print(report["suggested_threshold"], report["threshold_metrics"])
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Default DB path resolution:
|
|
173
|
+
|
|
174
|
+
| Environment | Default path |
|
|
175
|
+
|---|---|
|
|
176
|
+
| Env override | `HALLX_FEEDBACK_DB` |
|
|
177
|
+
| Windows | `%LOCALAPPDATA%\\hallx\\feedback.sqlite3` (fallback `%APPDATA%`) |
|
|
178
|
+
| macOS | `~/Library/Application Support/hallx/feedback.sqlite3` |
|
|
179
|
+
| Linux/servers | `$XDG_DATA_HOME/hallx/feedback.sqlite3` or `~/.local/share/hallx/feedback.sqlite3` |
|
|
180
|
+
|
|
181
|
+
## Production Notes
|
|
182
|
+
|
|
183
|
+
| Recommendation | Why |
|
|
184
|
+
|---|---|
|
|
185
|
+
| Enable strict mode on sensitive paths | block high-risk responses before side effects |
|
|
186
|
+
| Log `confidence`, `risk_level`, `issues` | support auditing and threshold tuning |
|
|
187
|
+
| Use calibration report regularly | adjust thresholds with real reviewed outcomes |
|
|
188
|
+
| Keep context quality high | grounding quality depends on evidence quality |
|
|
189
|
+
|
|
190
|
+
## Known Limitations
|
|
191
|
+
|
|
192
|
+
- Hallx is heuristic and does not provide formal factual guarantees.
|
|
193
|
+
- High confidence can still be wrong if context is missing, stale, or incorrect.
|
|
194
|
+
- Similarity-based checks can miss nuanced semantic contradictions.
|
|
195
|
+
- High-stakes domains should combine Hallx with domain validators and human review.
|
|
196
|
+
|
|
197
|
+
## Documentation
|
|
198
|
+
|
|
199
|
+
- [Usage Guide](docs/USAGE.md)
|
|
200
|
+
- [Production Notes](docs/PRODUCTION.md)
|
|
201
|
+
- [Contributing Guide](CONTRIBUTING.md)
|
|
202
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md)
|
|
203
|
+
- [License](LICENSE)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""hallx public API."""
|
|
2
|
+
|
|
3
|
+
from hallx.adapters import (
|
|
4
|
+
AnthropicAdapter,
|
|
5
|
+
GeminiAdapter,
|
|
6
|
+
GrokAdapter,
|
|
7
|
+
HuggingFaceAdapter,
|
|
8
|
+
OllamaAdapter,
|
|
9
|
+
OpenAIAdapter,
|
|
10
|
+
OpenRouterAdapter,
|
|
11
|
+
PerplexityAdapter,
|
|
12
|
+
)
|
|
13
|
+
from hallx.calibration import FeedbackStore, default_feedback_db_path
|
|
14
|
+
from hallx.core import Hallx
|
|
15
|
+
from hallx.types import HallxAdapterError, HallxHighRiskError, HallxResult, SchemaValidationResult
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"Hallx",
|
|
19
|
+
"HallxResult",
|
|
20
|
+
"SchemaValidationResult",
|
|
21
|
+
"HallxHighRiskError",
|
|
22
|
+
"HallxAdapterError",
|
|
23
|
+
"FeedbackStore",
|
|
24
|
+
"default_feedback_db_path",
|
|
25
|
+
"OpenAIAdapter",
|
|
26
|
+
"OpenRouterAdapter",
|
|
27
|
+
"AnthropicAdapter",
|
|
28
|
+
"PerplexityAdapter",
|
|
29
|
+
"HuggingFaceAdapter",
|
|
30
|
+
"OllamaAdapter",
|
|
31
|
+
"GeminiAdapter",
|
|
32
|
+
"GrokAdapter",
|
|
33
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Provider adapters for plugging Hallx into common LLM APIs."""
|
|
2
|
+
|
|
3
|
+
from hallx.adapters.anthropic import AnthropicAdapter
|
|
4
|
+
from hallx.adapters.gemini import GeminiAdapter
|
|
5
|
+
from hallx.adapters.grok import GrokAdapter
|
|
6
|
+
from hallx.adapters.huggingface import HuggingFaceAdapter
|
|
7
|
+
from hallx.adapters.ollama import OllamaAdapter
|
|
8
|
+
from hallx.adapters.openai import OpenAIAdapter
|
|
9
|
+
from hallx.adapters.openrouter import OpenRouterAdapter
|
|
10
|
+
from hallx.adapters.perplexity import PerplexityAdapter
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"OpenAIAdapter",
|
|
14
|
+
"OpenRouterAdapter",
|
|
15
|
+
"AnthropicAdapter",
|
|
16
|
+
"PerplexityAdapter",
|
|
17
|
+
"HuggingFaceAdapter",
|
|
18
|
+
"OllamaAdapter",
|
|
19
|
+
"GeminiAdapter",
|
|
20
|
+
"GrokAdapter",
|
|
21
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Anthropic adapter."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Mapping, Optional
|
|
4
|
+
|
|
5
|
+
from hallx.adapters.base import HTTPAdapter
|
|
6
|
+
from hallx.types import HallxAdapterError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnthropicAdapter(HTTPAdapter):
|
|
10
|
+
"""Anthropic Messages API adapter."""
|
|
11
|
+
|
|
12
|
+
endpoint = "https://api.anthropic.com/v1/messages"
|
|
13
|
+
|
|
14
|
+
def _headers(self) -> dict[str, str]:
|
|
15
|
+
headers = super()._headers()
|
|
16
|
+
headers["x-api-key"] = self.api_key
|
|
17
|
+
headers["anthropic-version"] = "2023-06-01"
|
|
18
|
+
headers.pop("Authorization", None)
|
|
19
|
+
return headers
|
|
20
|
+
|
|
21
|
+
def _build_payload(self, prompt: str, system_prompt: Optional[str]) -> Mapping[str, Any]:
|
|
22
|
+
payload: dict[str, Any] = {
|
|
23
|
+
"model": self.model,
|
|
24
|
+
"max_tokens": self.max_tokens,
|
|
25
|
+
"temperature": self.temperature,
|
|
26
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
27
|
+
}
|
|
28
|
+
if system_prompt:
|
|
29
|
+
payload["system"] = system_prompt
|
|
30
|
+
return payload
|
|
31
|
+
|
|
32
|
+
def _parse_response(self, body: Mapping[str, Any]) -> str:
|
|
33
|
+
content = body.get("content")
|
|
34
|
+
if not isinstance(content, list) or not content:
|
|
35
|
+
raise HallxAdapterError("Anthropic response missing content")
|
|
36
|
+
first = content[0]
|
|
37
|
+
if not isinstance(first, Mapping):
|
|
38
|
+
raise HallxAdapterError("Anthropic content is malformed")
|
|
39
|
+
text = first.get("text")
|
|
40
|
+
if not isinstance(text, str):
|
|
41
|
+
raise HallxAdapterError("Anthropic content text missing")
|
|
42
|
+
return text
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Shared HTTP adapter primitives for LLM providers."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Mapping, Optional
|
|
8
|
+
from urllib import error, request
|
|
9
|
+
|
|
10
|
+
from hallx.types import HallxAdapterError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class AdapterConfig:
|
|
15
|
+
"""Provider transport configuration."""
|
|
16
|
+
|
|
17
|
+
model: str
|
|
18
|
+
api_key: str
|
|
19
|
+
timeout_seconds: float = 20.0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HTTPAdapter:
|
|
23
|
+
"""Minimal secure HTTP JSON adapter with sync and async interfaces."""
|
|
24
|
+
|
|
25
|
+
endpoint: str
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
model: str,
|
|
30
|
+
api_key: str,
|
|
31
|
+
timeout_seconds: float = 20.0,
|
|
32
|
+
temperature: float = 0.0,
|
|
33
|
+
max_tokens: int = 256,
|
|
34
|
+
extra_headers: Optional[Mapping[str, str]] = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
if not model.strip():
|
|
37
|
+
raise ValueError("model must be non-empty")
|
|
38
|
+
if not api_key.strip():
|
|
39
|
+
raise ValueError("api_key must be non-empty")
|
|
40
|
+
if timeout_seconds <= 0.0 or timeout_seconds > 120.0:
|
|
41
|
+
raise ValueError("timeout_seconds must be between 0 and 120")
|
|
42
|
+
if max_tokens <= 0:
|
|
43
|
+
raise ValueError("max_tokens must be > 0")
|
|
44
|
+
|
|
45
|
+
self.model = model
|
|
46
|
+
self.api_key = api_key
|
|
47
|
+
self.timeout_seconds = timeout_seconds
|
|
48
|
+
self.temperature = max(0.0, min(2.0, temperature))
|
|
49
|
+
self.max_tokens = max_tokens
|
|
50
|
+
self.extra_headers = dict(extra_headers or {})
|
|
51
|
+
|
|
52
|
+
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
|
|
53
|
+
"""Generate text synchronously."""
|
|
54
|
+
payload = self._build_payload(prompt=prompt, system_prompt=system_prompt)
|
|
55
|
+
body = self._post_json(self.endpoint, payload, headers=self._headers())
|
|
56
|
+
return self._parse_response(body)
|
|
57
|
+
|
|
58
|
+
async def agenerate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
|
|
59
|
+
"""Generate text asynchronously using a worker thread."""
|
|
60
|
+
return await asyncio.to_thread(self.generate, prompt, system_prompt)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_env(cls, model: str, env_key_name: str, **kwargs: Any) -> "HTTPAdapter":
|
|
64
|
+
"""Construct adapter from environment variable key."""
|
|
65
|
+
api_key = os.getenv(env_key_name, "")
|
|
66
|
+
if not api_key:
|
|
67
|
+
raise ValueError(f"missing environment variable: {env_key_name}")
|
|
68
|
+
return cls(model=model, api_key=api_key, **kwargs)
|
|
69
|
+
|
|
70
|
+
def _headers(self) -> Dict[str, str]:
|
|
71
|
+
headers = {
|
|
72
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
73
|
+
"Content-Type": "application/json",
|
|
74
|
+
"Accept": "application/json",
|
|
75
|
+
}
|
|
76
|
+
headers.update(self.extra_headers)
|
|
77
|
+
return headers
|
|
78
|
+
|
|
79
|
+
def _build_payload(self, prompt: str, system_prompt: Optional[str]) -> Mapping[str, Any]:
|
|
80
|
+
raise NotImplementedError
|
|
81
|
+
|
|
82
|
+
def _parse_response(self, body: Mapping[str, Any]) -> str:
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
def _post_json(self, url: str, payload: Mapping[str, Any], headers: Mapping[str, str]) -> Dict[str, Any]:
|
|
86
|
+
data = json.dumps(payload, ensure_ascii=True).encode("utf-8")
|
|
87
|
+
req = request.Request(url=url, data=data, headers=dict(headers), method="POST")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
with request.urlopen(req, timeout=self.timeout_seconds) as resp:
|
|
91
|
+
raw = resp.read().decode("utf-8")
|
|
92
|
+
except error.HTTPError as exc:
|
|
93
|
+
message = exc.read().decode("utf-8", errors="ignore") if hasattr(exc, "read") else ""
|
|
94
|
+
raise HallxAdapterError(f"provider HTTP error {exc.code}: {message[:200]}") from exc
|
|
95
|
+
except error.URLError as exc:
|
|
96
|
+
raise HallxAdapterError(f"provider connection failed: {exc.reason}") from exc
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
parsed = json.loads(raw)
|
|
100
|
+
except json.JSONDecodeError as exc:
|
|
101
|
+
raise HallxAdapterError("provider returned non-JSON response") from exc
|
|
102
|
+
|
|
103
|
+
if not isinstance(parsed, dict):
|
|
104
|
+
raise HallxAdapterError("provider response must be a JSON object")
|
|
105
|
+
return parsed
|