groundrails 1.0.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundrails-1.0.29/LICENSE +10 -0
- groundrails-1.0.29/PKG-INFO +186 -0
- groundrails-1.0.29/README.md +126 -0
- groundrails-1.0.29/pyproject.toml +99 -0
- groundrails-1.0.29/setup.cfg +4 -0
- groundrails-1.0.29/src/groundrails/__init__.py +19 -0
- groundrails-1.0.29/src/groundrails/calibration.py +485 -0
- groundrails-1.0.29/src/groundrails/chunking.py +173 -0
- groundrails-1.0.29/src/groundrails/cli.py +477 -0
- groundrails-1.0.29/src/groundrails/config.py +306 -0
- groundrails-1.0.29/src/groundrails/config_document_processing.yaml +353 -0
- groundrails-1.0.29/src/groundrails/consistency.py +296 -0
- groundrails-1.0.29/src/groundrails/entity_check.py +520 -0
- groundrails-1.0.29/src/groundrails/extract.py +157 -0
- groundrails-1.0.29/src/groundrails/grounding.py +1400 -0
- groundrails-1.0.29/src/groundrails/lexical.py +723 -0
- groundrails-1.0.29/src/groundrails/lexical_mt.py +125 -0
- groundrails-1.0.29/src/groundrails/nli.py +97 -0
- groundrails-1.0.29/src/groundrails/sat/__init__.py +9 -0
- groundrails-1.0.29/src/groundrails/sat/_config.py +43 -0
- groundrails-1.0.29/src/groundrails/sat/_tokenizer.py +80 -0
- groundrails-1.0.29/src/groundrails/sat/_utils.py +77 -0
- groundrails-1.0.29/src/groundrails/sat/extract.py +169 -0
- groundrails-1.0.29/src/groundrails/sat/ov_backend.py +87 -0
- groundrails-1.0.29/src/groundrails/sat/segmenter.py +67 -0
- groundrails-1.0.29/src/groundrails/semantic.py +337 -0
- groundrails-1.0.29/src/groundrails/settings.py +150 -0
- groundrails-1.0.29/src/groundrails.egg-info/PKG-INFO +186 -0
- groundrails-1.0.29/src/groundrails.egg-info/SOURCES.txt +40 -0
- groundrails-1.0.29/src/groundrails.egg-info/dependency_links.txt +1 -0
- groundrails-1.0.29/src/groundrails.egg-info/entry_points.txt +2 -0
- groundrails-1.0.29/src/groundrails.egg-info/requires.txt +50 -0
- groundrails-1.0.29/src/groundrails.egg-info/top_level.txt +1 -0
- groundrails-1.0.29/tests/test_calibration.py +463 -0
- groundrails-1.0.29/tests/test_cli.py +89 -0
- groundrails-1.0.29/tests/test_config.py +206 -0
- groundrails-1.0.29/tests/test_document_processing.py +1013 -0
- groundrails-1.0.29/tests/test_equivalence_golden.py +77 -0
- groundrails-1.0.29/tests/test_lexical_grounding.py +343 -0
- groundrails-1.0.29/tests/test_mt_bridge.py +48 -0
- groundrails-1.0.29/tests/test_sat_segmenter.py +72 -0
- groundrails-1.0.29/tests/test_unsupported_language.py +73 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
|
|
2
|
+
The MIT License (MIT)
|
|
3
|
+
Copyright (c) 2026, Konrad Jelen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
10
|
+
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: groundrails
|
|
3
|
+
Version: 1.0.29
|
|
4
|
+
Summary: Grounding guardrails for agentic RAG - deterministic claim verification
|
|
5
|
+
Author: Konrad Jelen
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Requires-Python: ~=3.12.0
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: loguru
|
|
12
|
+
Requires-Dist: tqdm
|
|
13
|
+
Requires-Dist: typer
|
|
14
|
+
Requires-Dist: python-dotenv
|
|
15
|
+
Requires-Dist: botocore
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: pandas>=2.0
|
|
18
|
+
Requires-Dist: scipy
|
|
19
|
+
Requires-Dist: scikit-learn>=1.3
|
|
20
|
+
Requires-Dist: pyarrow>=14.0
|
|
21
|
+
Requires-Dist: pyyaml
|
|
22
|
+
Requires-Dist: rank-bm25>=0.2.2
|
|
23
|
+
Requires-Dist: rapidfuzz>=3.14.5
|
|
24
|
+
Requires-Dist: wordfreq>=3.0
|
|
25
|
+
Requires-Dist: nltk>=3.8
|
|
26
|
+
Requires-Dist: lingua-language-detector>=2.0
|
|
27
|
+
Requires-Dist: huggingface_hub>=0.23
|
|
28
|
+
Requires-Dist: openvino>=2025.0
|
|
29
|
+
Requires-Dist: tokenizers>=0.20
|
|
30
|
+
Requires-Dist: argostranslate>=1.9
|
|
31
|
+
Requires-Dist: ctranslate2>=4.0
|
|
32
|
+
Requires-Dist: sacremoses>=0.1
|
|
33
|
+
Requires-Dist: sentencepiece>=0.2
|
|
34
|
+
Requires-Dist: subword-nmt>=0.3
|
|
35
|
+
Requires-Dist: bambi>=0.13
|
|
36
|
+
Requires-Dist: pymc>=5.10
|
|
37
|
+
Requires-Dist: arviz>=0.17
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: build; extra == "dev"
|
|
40
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
41
|
+
Requires-Dist: ipython; extra == "dev"
|
|
42
|
+
Requires-Dist: nbdime; extra == "dev"
|
|
43
|
+
Requires-Dist: pip; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
46
|
+
Requires-Dist: ruff; extra == "dev"
|
|
47
|
+
Requires-Dist: awscli; extra == "dev"
|
|
48
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
49
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
50
|
+
Requires-Dist: jenkspy; extra == "dev"
|
|
51
|
+
Requires-Dist: nncf; extra == "dev"
|
|
52
|
+
Requires-Dist: onnxruntime>=1.17; extra == "dev"
|
|
53
|
+
Requires-Dist: wtpsplit-lite; extra == "dev"
|
|
54
|
+
Requires-Dist: rich; extra == "dev"
|
|
55
|
+
Provides-Extra: semantic
|
|
56
|
+
Requires-Dist: onnxruntime>=1.17; extra == "semantic"
|
|
57
|
+
Requires-Dist: transformers>=4.40; extra == "semantic"
|
|
58
|
+
Requires-Dist: faiss-cpu>=1.7; extra == "semantic"
|
|
59
|
+
Dynamic: license-file
|
|
60
|
+
|
|
61
|
+
# groundrails
|
|
62
|
+
|
|
63
|
+
[](https://github.com/stellarshenson/groundrails/actions/workflows/ci.yml)
|
|
64
|
+
[](https://pypi.org/project/groundrails/)
|
|
65
|
+
[](https://pepy.tech/project/groundrails)
|
|
66
|
+
[](https://www.python.org/downloads/)
|
|
67
|
+
[](https://opensource.org/licenses/MIT)
|
|
68
|
+
[](https://kolomolo.com)
|
|
69
|
+
[](https://www.paypal.com/donate/?hosted_button_id=B4KPBJDLLXTSA)
|
|
70
|
+
|
|
71
|
+
Grounding guardrails for agentic RAG - deterministic, torch-free claim verification.
|
|
72
|
+
|
|
73
|
+
groundrails checks whether a claim is supported by source text and flags hallucinations and contradictions, with no LLM in the loop. It runs on CPU, returns a structured verdict per claim, and is the library extracted from the lexical-grounding research line (Rounds 1-12).
|
|
74
|
+
|
|
75
|
+
## Why
|
|
76
|
+
|
|
77
|
+
Agentic RAG can assert things its sources never said. The usual fix - a second LLM grading each answer - is non-deterministic, costs a model call per claim, and gives no auditable reason for its verdict. groundrails is the deterministic gate that runs before output reaches the user.
|
|
78
|
+
|
|
79
|
+
- **No LLM in the loop** - frozen logistic weights over lexical features → same input gives the same verdict on every run
|
|
80
|
+
- **Cheap** - CPU-only, torch-free core; milliseconds per claim, no GPU, no API call
|
|
81
|
+
- **Auditable** - every verdict carries per-layer scores and the exact numeric or entity mismatch that triggered a flag
|
|
82
|
+
- **Cross-lingual offline** - claim-vs-evidence language gap is bridged by an on-device MT bridge, no translation API
|
|
83
|
+
- **Research-backed** - distilled from the lexical-grounding experiments (Rounds 1-12); see [Documentation](#documentation)
|
|
84
|
+
|
|
85
|
+
## What it does
|
|
86
|
+
|
|
87
|
+
- **Claim grounding** - locate a claim in source text across three lexical layers (regex exact, Levenshtein fuzzy, BM25 token-recall) and return a verdict with per-layer scores
|
|
88
|
+
- **Hallucination and contradiction detection** - numeric mismatch (`512` vs `1000`), named-entity mismatch (`H100` vs `A100`), and unsupported-claim flags
|
|
89
|
+
- **Cross-lingual grounding** - a claim in one language against evidence in another, via a torch-free MT bridge (argos / CTranslate2) and a SaT sentence segmenter (OpenVINO INT8)
|
|
90
|
+
- **Self-consistency** - intra-document divergence check (same entity or number category, different values)
|
|
91
|
+
- **Frozen-weight verdict** - a logistic manifold over 18 features at the `high` tier; deterministic, no per-call sampling
|
|
92
|
+
- **Optional semantic layer** - embedding retrieval + NLI entailment behind the `[semantic]` extra; off by default, keeps the core torch-free
|
|
93
|
+
|
|
94
|
+
## Install
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install groundrails # core grounder (torch-free)
|
|
98
|
+
pip install "groundrails[semantic]" # add the optional embedding + NLI layer
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## CLI
|
|
102
|
+
|
|
103
|
+
The `groundrails` command verifies claims against source text read as plain UTF-8.
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# put the evidence in a file, then ground a claim against it
|
|
107
|
+
echo "The Eiffel Tower is located in Paris, France." > doc.txt
|
|
108
|
+
groundrails ground --claim "The Eiffel Tower is in Paris." --source doc.txt
|
|
109
|
+
# → exit 0 (grounded); prints the match type, per-layer scores, and matched text
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
- `groundrails ground --claim "<claim>" --source doc.txt` - ground one claim; exit 0 if grounded, 1 if not
|
|
113
|
+
- `groundrails ground --manifest claims.json --source doc.txt [--json]` - batch over many claims
|
|
114
|
+
- `groundrails extract-claims --document doc.md` - heuristic sentence-to-claim extractor
|
|
115
|
+
- `groundrails check-consistency --document doc.md` - intra-document divergence report
|
|
116
|
+
- `groundrails config` - print the resolved config + calibration block
|
|
117
|
+
- `groundrails setup` - first-run semantic model/cache config
|
|
118
|
+
|
|
119
|
+
`--semantic` adds the optional embedding + NLI bundle to `ground`.
|
|
120
|
+
|
|
121
|
+
## Python API
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from groundrails import ground, ground_batch
|
|
125
|
+
|
|
126
|
+
m = ground(
|
|
127
|
+
"The Eiffel Tower is in Paris.",
|
|
128
|
+
["The Eiffel Tower is located in Paris, France."],
|
|
129
|
+
)
|
|
130
|
+
print(m.match_type, m.combined_score, m.verdict_probability)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`ground_batch(claims, sources, ...)` runs many claims against the same sources and returns a list of verdicts.
|
|
134
|
+
|
|
135
|
+
## Language support
|
|
136
|
+
|
|
137
|
+
Cross-lingual grounding needs an argos `<lang>→en` model for the claim's language. English is native and needs no bridge. Nine non-English languages have models installed.
|
|
138
|
+
|
|
139
|
+
| Language | Code | Grounding |
|
|
140
|
+
|---|:---:|:---:|
|
|
141
|
+
| English | `en` | ✓ |
|
|
142
|
+
| Danish | `da` | ✓ |
|
|
143
|
+
| German | `de` | ✓ |
|
|
144
|
+
| Spanish | `es` | ✓ |
|
|
145
|
+
| French | `fr` | ✓ |
|
|
146
|
+
| Italian | `it` | ✓ |
|
|
147
|
+
| Norwegian Bokmål | `nb` | ✓ |
|
|
148
|
+
| Dutch | `nl` | ✓ |
|
|
149
|
+
| Portuguese | `pt` | ✓ |
|
|
150
|
+
| Swedish | `sv` | ✓ |
|
|
151
|
+
| Norwegian Nynorsk | `nn` | ✗ |
|
|
152
|
+
| Latin | `la` | ✗ |
|
|
153
|
+
| Yoruba | `yo` | ✗ |
|
|
154
|
+
| Estonian | `et` | ✗ |
|
|
155
|
+
| Esperanto | `eo` | ✗ |
|
|
156
|
+
| Tsonga | `ts` | ✗ |
|
|
157
|
+
| Tagalog | `tl` | ✗ |
|
|
158
|
+
| Catalan | `ca` | ✗ |
|
|
159
|
+
| Czech | `cs` | ✗ |
|
|
160
|
+
| Hungarian | `hu` | ✗ |
|
|
161
|
+
| Tswana | `tn` | ✗ |
|
|
162
|
+
|
|
163
|
+
`✓` grounded - English native, others via the argos MT bridge · `✗` no installed argos model → `UnsupportedLanguageError` (any language not listed defaults to `✗`)
|
|
164
|
+
|
|
165
|
+
- **Supported** - full cross-lingual grounding: the claim is translated to English, then recall-matched against the evidence
|
|
166
|
+
- **Unsupported** - a non-English claim with no installed model → `ground()` raises `UnsupportedLanguageError`; the claim is hard-blocked, not scored, so unsupported languages cannot pollute metrics (batch callers wrap per claim)
|
|
167
|
+
- **Add a language** - `argospm install translate-<code>_en` installs the model; the bridge picks it up automatically
|
|
168
|
+
- **Region tags** - the detector strips the region before lookup (`it-IT` → `it`, `nb-NO` → `nb`)
|
|
169
|
+
|
|
170
|
+
## Documentation
|
|
171
|
+
|
|
172
|
+
The `docs/` tree carries the concept, the calibration method, and the full research history behind the shipped weights.
|
|
173
|
+
|
|
174
|
+
- **Concept** - [`docs/grounding_concept.md`](docs/grounding_concept.md) - what grounding means here and how a verdict is assembled
|
|
175
|
+
- **Calibration** - [`docs/grounding_calibration.md`](docs/grounding_calibration.md) - how the frozen weights and thresholds were fit
|
|
176
|
+
- **Experiments log** - [`docs/experiments/lexical-grounding-experiments.md`](docs/experiments/lexical-grounding-experiments.md) - Rounds 1-12, what moved the metrics and what did not
|
|
177
|
+
- **State of the art** - [`docs/experiments/lexical-grounding-sota.md`](docs/experiments/lexical-grounding-sota.md) - how the deterministic cascade compares to published grounding methods
|
|
178
|
+
- **Positional analysis** - [`docs/lost_in_the_middle_grounding_analysis.md`](docs/lost_in_the_middle_grounding_analysis.md) - lost-in-the-middle behaviour over long evidence
|
|
179
|
+
|
|
180
|
+
## Project layout
|
|
181
|
+
|
|
182
|
+
- `src/groundrails/` - the grounder (`grounding`, `lexical`, `lexical_mt`, `entity_check`, `consistency`, `calibration`, `chunking`, `extract`, `sat/`, `config` + the shipped `config_document_processing.yaml`)
|
|
183
|
+
- `experiments/grounding/` - research harness (Rounds 1-12)
|
|
184
|
+
- `notebooks/` - calibration, SaT / OpenVINO conversion, manifold retraining
|
|
185
|
+
- `tests/` - grounder tests + the exact-equivalence golden
|
|
186
|
+
- `data/`, `models/`, `references/` - datasets, OpenVINO IR, papers (large/private content gitignored)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# groundrails
|
|
2
|
+
|
|
3
|
+
[](https://github.com/stellarshenson/groundrails/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/groundrails/)
|
|
5
|
+
[](https://pepy.tech/project/groundrails)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[](https://kolomolo.com)
|
|
9
|
+
[](https://www.paypal.com/donate/?hosted_button_id=B4KPBJDLLXTSA)
|
|
10
|
+
|
|
11
|
+
Grounding guardrails for agentic RAG - deterministic, torch-free claim verification.
|
|
12
|
+
|
|
13
|
+
groundrails checks whether a claim is supported by source text and flags hallucinations and contradictions, with no LLM in the loop. It runs on CPU, returns a structured verdict per claim, and is the library extracted from the lexical-grounding research line (Rounds 1-12).
|
|
14
|
+
|
|
15
|
+
## Why
|
|
16
|
+
|
|
17
|
+
Agentic RAG can assert things its sources never said. The usual fix - a second LLM grading each answer - is non-deterministic, costs a model call per claim, and gives no auditable reason for its verdict. groundrails is the deterministic gate that runs before output reaches the user.
|
|
18
|
+
|
|
19
|
+
- **No LLM in the loop** - frozen logistic weights over lexical features → same input gives the same verdict on every run
|
|
20
|
+
- **Cheap** - CPU-only, torch-free core; milliseconds per claim, no GPU, no API call
|
|
21
|
+
- **Auditable** - every verdict carries per-layer scores and the exact numeric or entity mismatch that triggered a flag
|
|
22
|
+
- **Cross-lingual offline** - claim-vs-evidence language gap is bridged by an on-device MT bridge, no translation API
|
|
23
|
+
- **Research-backed** - distilled from the lexical-grounding experiments (Rounds 1-12); see [Documentation](#documentation)
|
|
24
|
+
|
|
25
|
+
## What it does
|
|
26
|
+
|
|
27
|
+
- **Claim grounding** - locate a claim in source text across three lexical layers (regex exact, Levenshtein fuzzy, BM25 token-recall) and return a verdict with per-layer scores
|
|
28
|
+
- **Hallucination and contradiction detection** - numeric mismatch (`512` vs `1000`), named-entity mismatch (`H100` vs `A100`), and unsupported-claim flags
|
|
29
|
+
- **Cross-lingual grounding** - a claim in one language against evidence in another, via a torch-free MT bridge (argos / CTranslate2) and a SaT sentence segmenter (OpenVINO INT8)
|
|
30
|
+
- **Self-consistency** - intra-document divergence check (same entity or number category, different values)
|
|
31
|
+
- **Frozen-weight verdict** - a logistic manifold over 18 features at the `high` tier; deterministic, no per-call sampling
|
|
32
|
+
- **Optional semantic layer** - embedding retrieval + NLI entailment behind the `[semantic]` extra; off by default, keeps the core torch-free
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install groundrails # core grounder (torch-free)
|
|
38
|
+
pip install "groundrails[semantic]" # add the optional embedding + NLI layer
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## CLI
|
|
42
|
+
|
|
43
|
+
The `groundrails` command verifies claims against source text read as plain UTF-8.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# put the evidence in a file, then ground a claim against it
|
|
47
|
+
echo "The Eiffel Tower is located in Paris, France." > doc.txt
|
|
48
|
+
groundrails ground --claim "The Eiffel Tower is in Paris." --source doc.txt
|
|
49
|
+
# → exit 0 (grounded); prints the match type, per-layer scores, and matched text
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
- `groundrails ground --claim "<claim>" --source doc.txt` - ground one claim; exit 0 if grounded, 1 if not
|
|
53
|
+
- `groundrails ground --manifest claims.json --source doc.txt [--json]` - batch over many claims
|
|
54
|
+
- `groundrails extract-claims --document doc.md` - heuristic sentence-to-claim extractor
|
|
55
|
+
- `groundrails check-consistency --document doc.md` - intra-document divergence report
|
|
56
|
+
- `groundrails config` - print the resolved config + calibration block
|
|
57
|
+
- `groundrails setup` - first-run semantic model/cache config
|
|
58
|
+
|
|
59
|
+
`--semantic` adds the optional embedding + NLI bundle to `ground`.
|
|
60
|
+
|
|
61
|
+
## Python API
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from groundrails import ground, ground_batch
|
|
65
|
+
|
|
66
|
+
m = ground(
|
|
67
|
+
"The Eiffel Tower is in Paris.",
|
|
68
|
+
["The Eiffel Tower is located in Paris, France."],
|
|
69
|
+
)
|
|
70
|
+
print(m.match_type, m.combined_score, m.verdict_probability)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`ground_batch(claims, sources, ...)` runs many claims against the same sources and returns a list of verdicts.
|
|
74
|
+
|
|
75
|
+
## Language support
|
|
76
|
+
|
|
77
|
+
Cross-lingual grounding needs an argos `<lang>→en` model for the claim's language. English is native and needs no bridge. Nine non-English languages have models installed.
|
|
78
|
+
|
|
79
|
+
| Language | Code | Grounding |
|
|
80
|
+
|---|:---:|:---:|
|
|
81
|
+
| English | `en` | ✓ |
|
|
82
|
+
| Danish | `da` | ✓ |
|
|
83
|
+
| German | `de` | ✓ |
|
|
84
|
+
| Spanish | `es` | ✓ |
|
|
85
|
+
| French | `fr` | ✓ |
|
|
86
|
+
| Italian | `it` | ✓ |
|
|
87
|
+
| Norwegian Bokmål | `nb` | ✓ |
|
|
88
|
+
| Dutch | `nl` | ✓ |
|
|
89
|
+
| Portuguese | `pt` | ✓ |
|
|
90
|
+
| Swedish | `sv` | ✓ |
|
|
91
|
+
| Norwegian Nynorsk | `nn` | ✗ |
|
|
92
|
+
| Latin | `la` | ✗ |
|
|
93
|
+
| Yoruba | `yo` | ✗ |
|
|
94
|
+
| Estonian | `et` | ✗ |
|
|
95
|
+
| Esperanto | `eo` | ✗ |
|
|
96
|
+
| Tsonga | `ts` | ✗ |
|
|
97
|
+
| Tagalog | `tl` | ✗ |
|
|
98
|
+
| Catalan | `ca` | ✗ |
|
|
99
|
+
| Czech | `cs` | ✗ |
|
|
100
|
+
| Hungarian | `hu` | ✗ |
|
|
101
|
+
| Tswana | `tn` | ✗ |
|
|
102
|
+
|
|
103
|
+
`✓` grounded - English native, others via the argos MT bridge · `✗` no installed argos model → `UnsupportedLanguageError` (any language not listed defaults to `✗`)
|
|
104
|
+
|
|
105
|
+
- **Supported** - full cross-lingual grounding: the claim is translated to English, then recall-matched against the evidence
|
|
106
|
+
- **Unsupported** - a non-English claim with no installed model → `ground()` raises `UnsupportedLanguageError`; the claim is hard-blocked, not scored, so unsupported languages cannot pollute metrics (batch callers wrap per claim)
|
|
107
|
+
- **Add a language** - `argospm install translate-<code>_en` installs the model; the bridge picks it up automatically
|
|
108
|
+
- **Region tags** - the detector strips the region before lookup (`it-IT` → `it`, `nb-NO` → `nb`)
|
|
109
|
+
|
|
110
|
+
## Documentation
|
|
111
|
+
|
|
112
|
+
The `docs/` tree carries the concept, the calibration method, and the full research history behind the shipped weights.
|
|
113
|
+
|
|
114
|
+
- **Concept** - [`docs/grounding_concept.md`](docs/grounding_concept.md) - what grounding means here and how a verdict is assembled
|
|
115
|
+
- **Calibration** - [`docs/grounding_calibration.md`](docs/grounding_calibration.md) - how the frozen weights and thresholds were fit
|
|
116
|
+
- **Experiments log** - [`docs/experiments/lexical-grounding-experiments.md`](docs/experiments/lexical-grounding-experiments.md) - Rounds 1-12, what moved the metrics and what did not
|
|
117
|
+
- **State of the art** - [`docs/experiments/lexical-grounding-sota.md`](docs/experiments/lexical-grounding-sota.md) - how the deterministic cascade compares to published grounding methods
|
|
118
|
+
- **Positional analysis** - [`docs/lost_in_the_middle_grounding_analysis.md`](docs/lost_in_the_middle_grounding_analysis.md) - lost-in-the-middle behaviour over long evidence
|
|
119
|
+
|
|
120
|
+
## Project layout
|
|
121
|
+
|
|
122
|
+
- `src/groundrails/` - the grounder (`grounding`, `lexical`, `lexical_mt`, `entity_check`, `consistency`, `calibration`, `chunking`, `extract`, `sat/`, `config` + the shipped `config_document_processing.yaml`)
|
|
123
|
+
- `experiments/grounding/` - research harness (Rounds 1-12)
|
|
124
|
+
- `notebooks/` - calibration, SaT / OpenVINO conversion, manifold retraining
|
|
125
|
+
- `tests/` - grounder tests + the exact-equivalence golden
|
|
126
|
+
- `data/`, `models/`, `references/` - datasets, OpenVINO IR, papers (large/private content gitignored)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "groundrails"
|
|
7
|
+
version = "1.0.29"
|
|
8
|
+
description = "Grounding guardrails for agentic RAG - deterministic claim verification"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "Konrad Jelen" },
|
|
11
|
+
]
|
|
12
|
+
license = "MIT"
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = "~=3.12.0"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
dependencies = [
|
|
20
|
+
"loguru",
|
|
21
|
+
"tqdm",
|
|
22
|
+
"typer",
|
|
23
|
+
"python-dotenv",
|
|
24
|
+
"botocore",
|
|
25
|
+
# --- grounder runtime stack (from parent stellars-claude-code-plugins) ---
|
|
26
|
+
"numpy",
|
|
27
|
+
"pandas>=2.0",
|
|
28
|
+
"scipy",
|
|
29
|
+
"scikit-learn>=1.3", # manifold fit path (LogisticRegression)
|
|
30
|
+
"pyarrow>=14.0", # parquet gold read
|
|
31
|
+
"pyyaml",
|
|
32
|
+
"rank-bm25>=0.2.2",
|
|
33
|
+
"rapidfuzz>=3.14.5",
|
|
34
|
+
"wordfreq>=3.0", # length-robust background token rarity
|
|
35
|
+
"nltk>=3.8", # WordNet antonym-flip (corpus fetched lazily)
|
|
36
|
+
"lingua-language-detector>=2.0", # per-claim language detection
|
|
37
|
+
"huggingface_hub>=0.23", # SaT OpenVINO IR download
|
|
38
|
+
"openvino>=2025.0", # native INT8 SaT sentence splitter
|
|
39
|
+
"tokenizers>=0.20", # xlm-roberta tokenizer for SaT
|
|
40
|
+
"argostranslate>=1.9", # torch-free MT bridge (CTranslate2)
|
|
41
|
+
"ctranslate2>=4.0", # MT inference engine
|
|
42
|
+
"sacremoses>=0.1", # MT tokenisation
|
|
43
|
+
"sentencepiece>=0.2", # MT subword model
|
|
44
|
+
"subword-nmt>=0.3", # MT BPE
|
|
45
|
+
"bambi>=0.13", # Bayesian calibration
|
|
46
|
+
"pymc>=5.10", # calibration sampler
|
|
47
|
+
"arviz>=0.17", # calibration diagnostics
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
[project.optional-dependencies]
|
|
51
|
+
dev = [
|
|
52
|
+
"build",
|
|
53
|
+
"ipykernel",
|
|
54
|
+
"ipython",
|
|
55
|
+
"nbdime",
|
|
56
|
+
"pip",
|
|
57
|
+
"pytest",
|
|
58
|
+
"pytest-cov",
|
|
59
|
+
"ruff",
|
|
60
|
+
"awscli",
|
|
61
|
+
"jupyter",
|
|
62
|
+
"matplotlib", # experiment plots (experiments/grounding/plots.py)
|
|
63
|
+
"jenkspy", # max-gap / natural-breaks batch threshold experiment
|
|
64
|
+
"nncf", # INT8 quantization for SaT conversion (notebook 02)
|
|
65
|
+
"onnxruntime>=1.17", # SaT model conversion (notebooks 02/04)
|
|
66
|
+
"wtpsplit-lite", # SaT lite splitter (notebooks 02/04) - verify exact dist name
|
|
67
|
+
"rich", # notebook rich output
|
|
68
|
+
]
|
|
69
|
+
semantic = [
|
|
70
|
+
"onnxruntime>=1.17", # NLI cross-encoder + embedding ONNX runtime
|
|
71
|
+
"transformers>=4.40", # tokenizer/model loading for the semantic layer
|
|
72
|
+
"faiss-cpu>=1.7", # embedding retrieval index
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[project.scripts]
|
|
76
|
+
groundrails = "groundrails.cli:main"
|
|
77
|
+
|
|
78
|
+
[tool.setuptools]
|
|
79
|
+
include-package-data = true
|
|
80
|
+
|
|
81
|
+
[tool.setuptools.package-data]
|
|
82
|
+
groundrails = ["config_*.yaml"]
|
|
83
|
+
|
|
84
|
+
[tool.setuptools.packages.find]
|
|
85
|
+
where = ["src"]
|
|
86
|
+
include = ["groundrails*"]
|
|
87
|
+
exclude = ["tests*"]
|
|
88
|
+
|
|
89
|
+
[tool.ruff]
|
|
90
|
+
line-length = 99
|
|
91
|
+
src = ["src/groundrails"]
|
|
92
|
+
include = ["pyproject.toml", "src/groundrails/**/*.py"]
|
|
93
|
+
|
|
94
|
+
[tool.ruff.lint]
|
|
95
|
+
extend-select = ["I"]
|
|
96
|
+
|
|
97
|
+
[tool.ruff.lint.isort]
|
|
98
|
+
known-first-party = ["groundrails"]
|
|
99
|
+
force-sort-within-sections = true
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""groundrails - grounding guardrails for agentic RAG.
|
|
2
|
+
|
|
3
|
+
Deterministic, torch-free claim verification. Core exports (zero heavy deps):
|
|
4
|
+
:func:`ground`, :func:`ground_batch`, :class:`GroundingMatch`, :class:`Location`.
|
|
5
|
+
|
|
6
|
+
Optional semantic grounding (NLI / cross-encoder + FAISS) lives in
|
|
7
|
+
:mod:`groundrails.semantic` and requires ``groundrails[semantic]``. It is
|
|
8
|
+
lazy-imported - ``import groundrails`` does NOT load torch, transformers, or faiss.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from groundrails.grounding import (
|
|
12
|
+
GroundingMatch,
|
|
13
|
+
Location,
|
|
14
|
+
UnsupportedLanguageError,
|
|
15
|
+
ground,
|
|
16
|
+
ground_batch,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = ["GroundingMatch", "Location", "UnsupportedLanguageError", "ground", "ground_batch"]
|