dokis 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dokis-0.1.0/LICENSE +21 -0
- dokis-0.1.0/PKG-INFO +342 -0
- dokis-0.1.0/README.md +298 -0
- dokis-0.1.0/dokis/__init__.py +170 -0
- dokis-0.1.0/dokis/adapters/__init__.py +0 -0
- dokis-0.1.0/dokis/adapters/langchain.py +155 -0
- dokis-0.1.0/dokis/adapters/llamaindex.py +93 -0
- dokis-0.1.0/dokis/cli.py +352 -0
- dokis-0.1.0/dokis/config.py +156 -0
- dokis-0.1.0/dokis/core/__init__.py +0 -0
- dokis-0.1.0/dokis/core/enforcer.py +108 -0
- dokis-0.1.0/dokis/core/extractor.py +158 -0
- dokis-0.1.0/dokis/core/matcher.py +298 -0
- dokis-0.1.0/dokis/core/scorer.py +56 -0
- dokis-0.1.0/dokis/exceptions.py +47 -0
- dokis-0.1.0/dokis/middleware.py +186 -0
- dokis-0.1.0/dokis/models.py +96 -0
- dokis-0.1.0/dokis.egg-info/PKG-INFO +342 -0
- dokis-0.1.0/dokis.egg-info/SOURCES.txt +32 -0
- dokis-0.1.0/dokis.egg-info/dependency_links.txt +1 -0
- dokis-0.1.0/dokis.egg-info/entry_points.txt +2 -0
- dokis-0.1.0/dokis.egg-info/requires.txt +25 -0
- dokis-0.1.0/dokis.egg-info/top_level.txt +1 -0
- dokis-0.1.0/pyproject.toml +84 -0
- dokis-0.1.0/setup.cfg +4 -0
- dokis-0.1.0/tests/test_adapters.py +353 -0
- dokis-0.1.0/tests/test_cli.py +412 -0
- dokis-0.1.0/tests/test_config.py +26 -0
- dokis-0.1.0/tests/test_enforcer.py +104 -0
- dokis-0.1.0/tests/test_extractor.py +150 -0
- dokis-0.1.0/tests/test_init.py +122 -0
- dokis-0.1.0/tests/test_matcher.py +408 -0
- dokis-0.1.0/tests/test_middleware.py +216 -0
- dokis-0.1.0/tests/test_scorer.py +70 -0
dokis-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dokis Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dokis-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dokis
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight RAG provenance middleware. Verifies every claim in an LLM response is grounded in a retrieved source - without an LLM call.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Vbj1808/dokis
|
|
7
|
+
Project-URL: Repository, https://github.com/Vbj1808/dokis
|
|
8
|
+
Project-URL: Issues, https://github.com/Vbj1808/dokis/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/Vbj1808/dokis/blob/main/CHANGELOG.md
|
|
10
|
+
Keywords: rag,provenance,hallucination,langchain,llm,citation,compliance,retrieval-augmented-generation
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
21
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: pydantic>=2.0
|
|
26
|
+
Requires-Dist: numpy>=1.26
|
|
27
|
+
Requires-Dist: bm25s>=0.2
|
|
28
|
+
Provides-Extra: langchain
|
|
29
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
30
|
+
Provides-Extra: llamaindex
|
|
31
|
+
Requires-Dist: llama-index-core>=0.10; extra == "llamaindex"
|
|
32
|
+
Provides-Extra: nltk
|
|
33
|
+
Requires-Dist: nltk>=3.8; extra == "nltk"
|
|
34
|
+
Provides-Extra: semantic
|
|
35
|
+
Requires-Dist: sentence-transformers>=2.7; extra == "semantic"
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
41
|
+
Requires-Dist: nltk>=3.8; extra == "dev"
|
|
42
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11" and extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
<div align="center">
|
|
46
|
+
|
|
47
|
+

|
|
48
|
+
|
|
49
|
+
<br/>
|
|
50
|
+
|
|
51
|
+
[](https://pypi.org/project/dokis/)
|
|
52
|
+
[](https://pypi.org/project/dokis/)
|
|
53
|
+
[](https://github.com/Vbj1808/dokis/actions)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
</div>
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## The problem
|
|
61
|
+
|
|
62
|
+
Every RAG pipeline has the same failure mode. The LLM takes five retrieved chunks, ignores three of them, and generates a response that cites facts from nowhere. Your retriever did its job. Your prompt did its job. The output still contains unsourced claims and you have no way to know until a user catches it.
|
|
63
|
+
|
|
64
|
+
Existing tools don't solve this at runtime:
|
|
65
|
+
|
|
66
|
+
- **RAGAS** evaluates offline. It can't catch a hallucination before it reaches a user.
|
|
67
|
+
- **LLM guardrails** handle safety and policy enforcement well - toxicity, jailbreaks, off-topic content. Their provenance validators strip unsupported sentences but don't return a structured claim→URL map, a compliance rate, or a source allowlist.
|
|
68
|
+
- **Prompt engineering** reduces the problem. It doesn't eliminate it.
|
|
69
|
+
|
|
70
|
+
Dokis sits inline - between your retriever and your LLM response going out - and enforces provenance in real time.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## How it works
|
|
75
|
+
|
|
76
|
+
Dokis does exactly two things:
|
|
77
|
+
|
|
78
|
+
**1. Pre-retrieval enforcement.** Strip chunks whose source URL is not on your allowlist before they enter the prompt.
|
|
79
|
+
|
|
80
|
+
**2. Post-generation auditing.** Split the response into atomic claim sentences. Match each claim to the chunk it came from using BM25 lexical scoring. Build a `claim → chunk → URL` provenance map. Compute a compliance rate. Flag anything below your threshold.
|
|
81
|
+
|
|
82
|
+
No LLM call. No API key. No network request after startup. Deterministic output.
|
|
83
|
+
|
|
84
|
+
<div align="center">
|
|
85
|
+
|
|
86
|
+

|
|
87
|
+
|
|
88
|
+
</div>
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## See it in action
|
|
93
|
+
|
|
94
|
+
<div align="center">
|
|
95
|
+
|
|
96
|
+

|
|
97
|
+
|
|
98
|
+
</div>
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Quickstart
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install dokis
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import dokis
|
|
110
|
+
|
|
111
|
+
result = dokis.audit(query, chunks, response)
|
|
112
|
+
|
|
113
|
+
print(result.compliance_rate) # 0.92
|
|
114
|
+
print(result.provenance_map) # { "Aspirin inhibits COX...": "pubmed.gov/..." }
|
|
115
|
+
print(result.violations) # claims with no source
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Zero config
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
import dokis
|
|
122
|
+
|
|
123
|
+
result = dokis.audit(query, chunks, response)
|
|
124
|
+
|
|
125
|
+
print(result.compliance_rate) # 0.91
|
|
126
|
+
print(result.passed) # True
|
|
127
|
+
print(result.provenance_map) # {"Aspirin inhibits...": "https://pubmed.com/1"}
|
|
128
|
+
print(result.violations) # claims with no source
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### With config
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
import dokis
|
|
135
|
+
|
|
136
|
+
config = dokis.Config(
|
|
137
|
+
allowed_domains = ["pubmed.ncbi.nlm.nih.gov", "cochrane.org"],
|
|
138
|
+
min_citation_rate = 0.85,
|
|
139
|
+
claim_threshold = 0.3,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
clean_chunks = dokis.filter(raw_chunks, config)
|
|
143
|
+
response = llm.invoke(build_prompt(query, clean_chunks))
|
|
144
|
+
result = dokis.audit(query, clean_chunks, response, config=config)
|
|
145
|
+
|
|
146
|
+
if not result.passed:
|
|
147
|
+
raise dokis.ComplianceViolation(result)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### LangChain - two lines
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from dokis.adapters.langchain import ProvenanceRetriever
|
|
154
|
+
|
|
155
|
+
retriever = ProvenanceRetriever(
|
|
156
|
+
base_retriever=your_existing_retriever,
|
|
157
|
+
config=dokis.Config(allowed_domains=["pubmed.ncbi.nlm.nih.gov"]),
|
|
158
|
+
)
|
|
159
|
+
docs = retriever.invoke(query)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### LlamaIndex
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from dokis.adapters.llamaindex import ProvenanceQueryEngine
|
|
166
|
+
|
|
167
|
+
engine = ProvenanceQueryEngine(
|
|
168
|
+
base_engine=your_existing_engine,
|
|
169
|
+
chunks=source_chunks,
|
|
170
|
+
config=dokis.Config(min_citation_rate=0.80),
|
|
171
|
+
)
|
|
172
|
+
response = engine.query("What reduces fever?")
|
|
173
|
+
result = response.metadata["provenance"]
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### CLI
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
dokis audit input.json
|
|
180
|
+
dokis audit input.json --config provenance.toml
|
|
181
|
+
cat input.json | dokis audit -
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Reusable middleware (production pattern)
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from dokis import ProvenanceMiddleware, Config
|
|
188
|
+
|
|
189
|
+
mw = ProvenanceMiddleware(Config(
|
|
190
|
+
allowed_domains = ["pubmed.ncbi.nlm.nih.gov", "cochrane.org"],
|
|
191
|
+
min_citation_rate = 0.85,
|
|
192
|
+
matcher = "bm25",
|
|
193
|
+
claim_threshold = 0.3,
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
result = mw.audit(query, chunks, response)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Async
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
result = await mw.aaudit(query, chunks, response)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Installation
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
pip install dokis # BM25 default, zero cold start
|
|
211
|
+
pip install dokis[semantic] # adds SentenceTransformer matching
|
|
212
|
+
pip install dokis[nltk] # adds NLTK sentence splitting
|
|
213
|
+
pip install dokis[langchain] # adds LangChain ProvenanceRetriever
|
|
214
|
+
pip install dokis[llamaindex] # adds LlamaIndex ProvenanceQueryEngine
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Configuration
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
dokis.Config(
|
|
223
|
+
allowed_domains = [],
|
|
224
|
+
min_citation_rate = 0.80,
|
|
225
|
+
claim_threshold = 0.35,
|
|
226
|
+
extractor = "regex", # "regex" | "nltk" | "llm"
|
|
227
|
+
matcher = "bm25", # "bm25" | "semantic"
|
|
228
|
+
model = "all-MiniLM-L6-v2",
|
|
229
|
+
fail_on_violation = False,
|
|
230
|
+
domain = None,
|
|
231
|
+
)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**`claim_threshold` by matcher:**
|
|
235
|
+
- `matcher="bm25"`: normalised per-query BM25 score. Recommended: `0.3–0.5`.
|
|
236
|
+
- `matcher="semantic"`: cosine similarity. Recommended: `0.65–0.85`.
|
|
237
|
+
|
|
238
|
+
**Load from TOML:**
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
# method is named from_yaml for backwards compatibility - pass a .toml file
|
|
242
|
+
config = dokis.Config.from_yaml("provenance.toml")
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## The result object
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
result.compliance_rate # float
|
|
251
|
+
result.passed # bool
|
|
252
|
+
result.violations # list[Claim]
|
|
253
|
+
result.provenance_map # dict[claim_text, source_url]
|
|
254
|
+
result.blocked_sources # list[str]
|
|
255
|
+
result.claims # list[Claim]
|
|
256
|
+
|
|
257
|
+
claim.text # str
|
|
258
|
+
claim.supported # bool
|
|
259
|
+
claim.confidence # float - always set, even when False
|
|
260
|
+
claim.source_url # str | None
|
|
261
|
+
claim.source_chunk # Chunk | None
|
|
262
|
+
|
|
263
|
+
record = result.model_dump_json() # fully JSON-serialisable
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Benchmarks
|
|
269
|
+
|
|
270
|
+
Measured on Python 3.12. Medians over 10 warm runs.
|
|
271
|
+
|
|
272
|
+
### Cold start
|
|
273
|
+
|
|
274
|
+
| Matcher | Cold start | What loads |
|
|
275
|
+
|---|---|---|
|
|
276
|
+
| `bm25` (default) | **~0 ms** | Nothing - pure Python |
|
|
277
|
+
| `semantic` | **~1,666 ms** | `all-MiniLM-L6-v2` (~80 MB) |
|
|
278
|
+
|
|
279
|
+
### Per-call audit latency (5 chunks, 3 claims)
|
|
280
|
+
|
|
281
|
+
| Matcher | Median | p95 |
|
|
282
|
+
|---|---|---|
|
|
283
|
+
| `bm25` (default) | **0.96 ms** | 1.29 ms |
|
|
284
|
+
| `semantic` | **21.99 ms** | 31.45 ms |
|
|
285
|
+
|
|
286
|
+
BM25 is **23× faster** per audit call. The BM25 index is cached per chunk set - repeated calls against the same chunks stay sub-millisecond.
|
|
287
|
+
|
|
288
|
+
### Install footprint
|
|
289
|
+
|
|
290
|
+
| `pip install dokis` | `pip install dokis[semantic]` |
|
|
291
|
+
|---|---|
|
|
292
|
+
| ~42 MB (pydantic + numpy + bm25s) | ~135 MB (+ model weights) |
|
|
293
|
+
|
|
294
|
+
### Accuracy (5 grounded + 5 ungrounded claims)
|
|
295
|
+
|
|
296
|
+
| Matcher | Grounded detected | Ungrounded rejected |
|
|
297
|
+
|---|---|---|
|
|
298
|
+
| `bm25` (default) | 5/5 | 4/4 ✦ |
|
|
299
|
+
| `semantic` | 5/5 | 4/4 ✦ |
|
|
300
|
+
|
|
301
|
+
✦ One claim was 7 words - below the 8-word minimum - and filtered before matching. Effective ungrounded rejection rate is 100% for both matchers.
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Comparison
|
|
306
|
+
|
|
307
|
+
| | Dokis | RAGAS | LLM guardrails |
|
|
308
|
+
|---|---|---|---|
|
|
309
|
+
| Runtime enforcement | ✅ | ❌ offline only | ✅ |
|
|
310
|
+
| No LLM call needed | ✅ | ❌ | partial ✦ |
|
|
311
|
+
| Per-claim provenance map | ✅ | partial | partial ✧ |
|
|
312
|
+
| Source allowlisting | ✅ | ❌ | ❌ |
|
|
313
|
+
| Compliance rate per response | ✅ | ❌ | ❌ |
|
|
314
|
+
| LangChain integration | ✅ drop-in retriever | ✅ evaluation wrapper | varies |
|
|
315
|
+
| JSON-serialisable audit log | ✅ per-response | ❌ | ❌ |
|
|
316
|
+
| Cold start | ~0 ms | - | varies |
|
|
317
|
+
| Core install size | ~42 MB | - | - |
|
|
318
|
+
|
|
319
|
+
✦ ProvenanceEmbeddings uses no LLM call. ProvenanceLLM requires one.
|
|
320
|
+
✧ Guardrails strips unsupported sentences from the response. Dokis returns a structured claim→URL map you can store and query.
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Examples
|
|
325
|
+
|
|
326
|
+
Three working demos in [dokis-examples](https://github.com/Vbj1808/dokis-examples):
|
|
327
|
+
|
|
328
|
+
- **01 - Local files** - txt files + BM25 + Ollama
|
|
329
|
+
- **02 - Chroma vector store** - Chroma + nomic-embed-text + Ollama
|
|
330
|
+
- **03 - Live web search** - Serper API + domain allowlisting + Ollama
|
|
331
|
+
|
|
332
|
+
---
|
|
333
|
+
|
|
334
|
+
## Core dependencies
|
|
335
|
+
|
|
336
|
+
`pip install dokis` installs exactly three packages: `pydantic>=2.0`, `numpy>=1.26`, `bm25s>=0.2`.
|
|
337
|
+
|
|
338
|
+
---
|
|
339
|
+
|
|
340
|
+
## License
|
|
341
|
+
|
|
342
|
+
MIT
|
dokis-0.1.0/README.md
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
<br/>
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/dokis/)
|
|
8
|
+
[](https://pypi.org/project/dokis/)
|
|
9
|
+
[](https://github.com/Vbj1808/dokis/actions)
|
|
10
|
+
[](LICENSE)
|
|
11
|
+
|
|
12
|
+
</div>
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## The problem
|
|
17
|
+
|
|
18
|
+
Every RAG pipeline has the same failure mode. The LLM takes five retrieved chunks, ignores three of them, and generates a response that cites facts from nowhere. Your retriever did its job. Your prompt did its job. The output still contains unsourced claims and you have no way to know until a user catches it.
|
|
19
|
+
|
|
20
|
+
Existing tools don't solve this at runtime:
|
|
21
|
+
|
|
22
|
+
- **RAGAS** evaluates offline. It can't catch a hallucination before it reaches a user.
|
|
23
|
+
- **LLM guardrails** handle safety and policy enforcement well - toxicity, jailbreaks, off-topic content. Their provenance validators strip unsupported sentences but don't return a structured claim→URL map, a compliance rate, or a source allowlist.
|
|
24
|
+
- **Prompt engineering** reduces the problem. It doesn't eliminate it.
|
|
25
|
+
|
|
26
|
+
Dokis sits inline - between your retriever and your LLM response going out - and enforces provenance in real time.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## How it works
|
|
31
|
+
|
|
32
|
+
Dokis does exactly two things:
|
|
33
|
+
|
|
34
|
+
**1. Pre-retrieval enforcement.** Strip chunks whose source URL is not on your allowlist before they enter the prompt.
|
|
35
|
+
|
|
36
|
+
**2. Post-generation auditing.** Split the response into atomic claim sentences. Match each claim to the chunk it came from using BM25 lexical scoring. Build a `claim → chunk → URL` provenance map. Compute a compliance rate. Flag anything below your threshold.
|
|
37
|
+
|
|
38
|
+
No LLM call. No API key. No network request after startup. Deterministic output.
|
|
39
|
+
|
|
40
|
+
<div align="center">
|
|
41
|
+
|
|
42
|
+

|
|
43
|
+
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## See it in action
|
|
49
|
+
|
|
50
|
+
<div align="center">
|
|
51
|
+
|
|
52
|
+

|
|
53
|
+
|
|
54
|
+
</div>
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install dokis
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import dokis
|
|
66
|
+
|
|
67
|
+
result = dokis.audit(query, chunks, response)
|
|
68
|
+
|
|
69
|
+
print(result.compliance_rate) # 0.92
|
|
70
|
+
print(result.provenance_map) # { "Aspirin inhibits COX...": "pubmed.gov/..." }
|
|
71
|
+
print(result.violations) # claims with no source
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Zero config
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import dokis
|
|
78
|
+
|
|
79
|
+
result = dokis.audit(query, chunks, response)
|
|
80
|
+
|
|
81
|
+
print(result.compliance_rate) # 0.91
|
|
82
|
+
print(result.passed) # True
|
|
83
|
+
print(result.provenance_map) # {"Aspirin inhibits...": "https://pubmed.com/1"}
|
|
84
|
+
print(result.violations) # claims with no source
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### With config
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
import dokis
|
|
91
|
+
|
|
92
|
+
config = dokis.Config(
|
|
93
|
+
allowed_domains = ["pubmed.ncbi.nlm.nih.gov", "cochrane.org"],
|
|
94
|
+
min_citation_rate = 0.85,
|
|
95
|
+
claim_threshold = 0.3,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
clean_chunks = dokis.filter(raw_chunks, config)
|
|
99
|
+
response = llm.invoke(build_prompt(query, clean_chunks))
|
|
100
|
+
result = dokis.audit(query, clean_chunks, response, config=config)
|
|
101
|
+
|
|
102
|
+
if not result.passed:
|
|
103
|
+
raise dokis.ComplianceViolation(result)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### LangChain - two lines
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from dokis.adapters.langchain import ProvenanceRetriever
|
|
110
|
+
|
|
111
|
+
retriever = ProvenanceRetriever(
|
|
112
|
+
base_retriever=your_existing_retriever,
|
|
113
|
+
config=dokis.Config(allowed_domains=["pubmed.ncbi.nlm.nih.gov"]),
|
|
114
|
+
)
|
|
115
|
+
docs = retriever.invoke(query)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### LlamaIndex
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from dokis.adapters.llamaindex import ProvenanceQueryEngine
|
|
122
|
+
|
|
123
|
+
engine = ProvenanceQueryEngine(
|
|
124
|
+
base_engine=your_existing_engine,
|
|
125
|
+
chunks=source_chunks,
|
|
126
|
+
config=dokis.Config(min_citation_rate=0.80),
|
|
127
|
+
)
|
|
128
|
+
response = engine.query("What reduces fever?")
|
|
129
|
+
result = response.metadata["provenance"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### CLI
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
dokis audit input.json
|
|
136
|
+
dokis audit input.json --config provenance.toml
|
|
137
|
+
cat input.json | dokis audit -
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Reusable middleware (production pattern)
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from dokis import ProvenanceMiddleware, Config
|
|
144
|
+
|
|
145
|
+
mw = ProvenanceMiddleware(Config(
|
|
146
|
+
allowed_domains = ["pubmed.ncbi.nlm.nih.gov", "cochrane.org"],
|
|
147
|
+
min_citation_rate = 0.85,
|
|
148
|
+
matcher = "bm25",
|
|
149
|
+
claim_threshold = 0.3,
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
result = mw.audit(query, chunks, response)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Async
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
result = await mw.aaudit(query, chunks, response)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Installation
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
pip install dokis # BM25 default, zero cold start
|
|
167
|
+
pip install dokis[semantic] # adds SentenceTransformer matching
|
|
168
|
+
pip install dokis[nltk] # adds NLTK sentence splitting
|
|
169
|
+
pip install dokis[langchain] # adds LangChain ProvenanceRetriever
|
|
170
|
+
pip install dokis[llamaindex] # adds LlamaIndex ProvenanceQueryEngine
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Configuration
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
dokis.Config(
|
|
179
|
+
allowed_domains = [],
|
|
180
|
+
min_citation_rate = 0.80,
|
|
181
|
+
claim_threshold = 0.35,
|
|
182
|
+
extractor = "regex", # "regex" | "nltk" | "llm"
|
|
183
|
+
matcher = "bm25", # "bm25" | "semantic"
|
|
184
|
+
model = "all-MiniLM-L6-v2",
|
|
185
|
+
fail_on_violation = False,
|
|
186
|
+
domain = None,
|
|
187
|
+
)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**`claim_threshold` by matcher:**
|
|
191
|
+
- `matcher="bm25"`: normalised per-query BM25 score. Recommended: `0.3–0.5`.
|
|
192
|
+
- `matcher="semantic"`: cosine similarity. Recommended: `0.65–0.85`.
|
|
193
|
+
|
|
194
|
+
**Load from TOML:**
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
# method is named from_yaml for backwards compatibility - pass a .toml file
|
|
198
|
+
config = dokis.Config.from_yaml("provenance.toml")
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## The result object
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
result.compliance_rate # float
|
|
207
|
+
result.passed # bool
|
|
208
|
+
result.violations # list[Claim]
|
|
209
|
+
result.provenance_map # dict[claim_text, source_url]
|
|
210
|
+
result.blocked_sources # list[str]
|
|
211
|
+
result.claims # list[Claim]
|
|
212
|
+
|
|
213
|
+
claim.text # str
|
|
214
|
+
claim.supported # bool
|
|
215
|
+
claim.confidence # float - always set, even when False
|
|
216
|
+
claim.source_url # str | None
|
|
217
|
+
claim.source_chunk # Chunk | None
|
|
218
|
+
|
|
219
|
+
record = result.model_dump_json() # fully JSON-serialisable
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Benchmarks
|
|
225
|
+
|
|
226
|
+
Measured on Python 3.12. Medians over 10 warm runs.
|
|
227
|
+
|
|
228
|
+
### Cold start
|
|
229
|
+
|
|
230
|
+
| Matcher | Cold start | What loads |
|
|
231
|
+
|---|---|---|
|
|
232
|
+
| `bm25` (default) | **~0 ms** | Nothing - pure Python |
|
|
233
|
+
| `semantic` | **~1,666 ms** | `all-MiniLM-L6-v2` (~80 MB) |
|
|
234
|
+
|
|
235
|
+
### Per-call audit latency (5 chunks, 3 claims)
|
|
236
|
+
|
|
237
|
+
| Matcher | Median | p95 |
|
|
238
|
+
|---|---|---|
|
|
239
|
+
| `bm25` (default) | **0.96 ms** | 1.29 ms |
|
|
240
|
+
| `semantic` | **21.99 ms** | 31.45 ms |
|
|
241
|
+
|
|
242
|
+
BM25 is **23× faster** per audit call. The BM25 index is cached per chunk set - repeated calls against the same chunks stay sub-millisecond.
|
|
243
|
+
|
|
244
|
+
### Install footprint
|
|
245
|
+
|
|
246
|
+
| `pip install dokis` | `pip install dokis[semantic]` |
|
|
247
|
+
|---|---|
|
|
248
|
+
| ~42 MB (pydantic + numpy + bm25s) | ~135 MB (+ model weights) |
|
|
249
|
+
|
|
250
|
+
### Accuracy (5 grounded + 5 ungrounded claims)
|
|
251
|
+
|
|
252
|
+
| Matcher | Grounded detected | Ungrounded rejected |
|
|
253
|
+
|---|---|---|
|
|
254
|
+
| `bm25` (default) | 5/5 | 4/4 ✦ |
|
|
255
|
+
| `semantic` | 5/5 | 4/4 ✦ |
|
|
256
|
+
|
|
257
|
+
✦ One claim was 7 words - below the 8-word minimum - and filtered before matching. Effective ungrounded rejection rate is 100% for both matchers.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Comparison
|
|
262
|
+
|
|
263
|
+
| | Dokis | RAGAS | LLM guardrails |
|
|
264
|
+
|---|---|---|---|
|
|
265
|
+
| Runtime enforcement | ✅ | ❌ offline only | ✅ |
|
|
266
|
+
| No LLM call needed | ✅ | ❌ | partial ✦ |
|
|
267
|
+
| Per-claim provenance map | ✅ | partial | partial ✧ |
|
|
268
|
+
| Source allowlisting | ✅ | ❌ | ❌ |
|
|
269
|
+
| Compliance rate per response | ✅ | ❌ | ❌ |
|
|
270
|
+
| LangChain integration | ✅ drop-in retriever | ✅ evaluation wrapper | varies |
|
|
271
|
+
| JSON-serialisable audit log | ✅ per-response | ❌ | ❌ |
|
|
272
|
+
| Cold start | ~0 ms | - | varies |
|
|
273
|
+
| Core install size | ~42 MB | - | - |
|
|
274
|
+
|
|
275
|
+
✦ ProvenanceEmbeddings uses no LLM call. ProvenanceLLM requires one.
|
|
276
|
+
✧ Guardrails strips unsupported sentences from the response. Dokis returns a structured claim→URL map you can store and query.
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## Examples
|
|
281
|
+
|
|
282
|
+
Three working demos in [dokis-examples](https://github.com/Vbj1808/dokis-examples):
|
|
283
|
+
|
|
284
|
+
- **01 - Local files** - txt files + BM25 + Ollama
|
|
285
|
+
- **02 - Chroma vector store** - Chroma + nomic-embed-text + Ollama
|
|
286
|
+
- **03 - Live web search** - Serper API + domain allowlisting + Ollama
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Core dependencies
|
|
291
|
+
|
|
292
|
+
`pip install dokis` installs exactly three packages: `pydantic>=2.0`, `numpy>=1.26`, `bm25s>=0.2`.
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## License
|
|
297
|
+
|
|
298
|
+
MIT
|