ragfallback 2.2.0__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragfallback-2.2.0 → ragfallback-2.2.2}/MANIFEST.in +0 -9
- {ragfallback-2.2.0/ragfallback.egg-info → ragfallback-2.2.2}/PKG-INFO +168 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/README.md +164 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/build_golden_dataset.py +6 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/chroma_real_kb_demo.py +18 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/ci_regression_gate.py +4 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/financial_risk_analysis.py +11 -4
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/legal_document_analysis.py +21 -8
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/medical_research_synthesis.py +22 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/real_data_demo.py +2 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc10_metadata_sanitizer.py +17 -7
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc1_retrieval_health.py +4 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc2_embedding_guard.py +1 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc3_chunk_quality.py +10 -7
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc4_context_window.py +4 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc5_hybrid_failover.py +22 -8
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc6_adaptive_rag.py +9 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc6_multi_hop_demo.py +10 -6
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc7_rag_evaluator.py +26 -11
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc8_context_stitcher.py +11 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/uc9_embedding_probe.py +7 -8
- {ragfallback-2.2.0 → ragfallback-2.2.2}/pyproject.toml +4 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/pytest.ini +1 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/__init__.py +1 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/core/__init__.py +0 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/core/adaptive_retriever.py +108 -69
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/__init__.py +12 -3
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/chunking.py +4 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/context_window.py +1 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/embedding_probe.py +1 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/retrieval_health.py +6 -6
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/schema_sanitizer.py +3 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/stale_index.py +12 -4
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/evaluation/rag_evaluator.py +12 -6
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/__init__.py +4 -4
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/baseline_registry.py +11 -14
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/golden_runner.py +3 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/ragas_hook.py +5 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/retrieval/failover.py +8 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/retrieval/smart_hybrid.py +4 -4
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/retrieval/wrappers.py +6 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/strategies/__init__.py +5 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/strategies/base.py +6 -16
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/strategies/multi_hop.py +17 -11
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/strategies/query_variations.py +26 -20
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/tracking/__init__.py +7 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/tracking/cache_monitor.py +4 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/tracking/cost_tracker.py +26 -31
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/tracking/metrics.py +22 -20
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/__init__.py +10 -11
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/confidence_scorer.py +1 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/embedding_factory.py +20 -40
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/env.py +1 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/llm_factory.py +40 -51
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/utils/vector_store_factory.py +30 -44
- {ragfallback-2.2.0 → ragfallback-2.2.2/ragfallback.egg-info}/PKG-INFO +168 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/requirements-dev.txt +0 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/setup.py +2 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/__init__.py +0 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/conftest.py +14 -14
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/integration/test_adaptive_workflow.py +26 -26
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/integration/test_chroma_pipeline.py +10 -3
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_adaptive_multi_hop_bridge.py +21 -9
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_async_retriever.py +7 -17
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_cache_monitor.py +16 -11
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_confidence_scorer.py +44 -28
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_cost_tracker.py +7 -17
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_diagnostics.py +13 -5
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_hybrid_retrieval.py +8 -10
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_metrics.py +12 -23
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_multi_hop.py +0 -1
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_query_variations.py +12 -17
- {ragfallback-2.2.0 → ragfallback-2.2.2}/INSTALL_AND_RUN.md +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/LICENSE +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/_kb_common.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/mlops_demo.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/production_reliability_example.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/examples/qdrant_local_demo.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/context_stitcher.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/embedding_guard.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/diagnostics/embedding_validator.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/evaluation/__init__.py +2 -2
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/exceptions.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/locust_template.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/mlflow_logger.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/mlops/query_simulator.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/py.typed +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/retrieval/__init__.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback/retrieval/rerank_guard.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback.egg-info/SOURCES.txt +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback.egg-info/dependency_links.txt +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback.egg-info/requires.txt +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/ragfallback.egg-info/top_level.txt +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/setup.cfg +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/integration/__init__.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/__init__.py +0 -0
- {ragfallback-2.2.0 → ragfallback-2.2.2}/tests/unit/test_retrieval.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragfallback
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Prevents silent RAG failures — chunk quality, retrieval fallback, adaptive querying, and answer evaluation in one library.
|
|
5
5
|
Home-page: https://github.com/irfanalidv/ragfallback
|
|
6
6
|
Author: Irfan Ali
|
|
@@ -10,9 +10,11 @@ Project-URL: Homepage, https://github.com/irfanalidv/ragfallback
|
|
|
10
10
|
Project-URL: Documentation, https://github.com/irfanalidv/ragfallback#readme
|
|
11
11
|
Project-URL: Repository, https://github.com/irfanalidv/ragfallback
|
|
12
12
|
Project-URL: Issues, https://github.com/irfanalidv/ragfallback/issues
|
|
13
|
+
Project-URL: Changelog, https://github.com/irfanalidv/ragfallback/blob/main/CHANGELOG.md
|
|
13
14
|
Keywords: rag,retrieval,llm,fallback,query-variations,langchain,bm25,hybrid-search
|
|
14
15
|
Classifier: Development Status :: 4 - Beta
|
|
15
16
|
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
16
18
|
Classifier: Programming Language :: Python :: 3
|
|
17
19
|
Classifier: Programming Language :: Python :: 3.8
|
|
18
20
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -20,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
24
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Typing :: Typed
|
|
23
26
|
Requires-Python: >=3.8
|
|
24
27
|
Description-Content-Type: text/markdown
|
|
25
28
|
License-File: LICENSE
|
|
@@ -102,17 +105,65 @@ Dynamic: home-page
|
|
|
102
105
|
Dynamic: license-file
|
|
103
106
|
Dynamic: requires-python
|
|
104
107
|
|
|
108
|
+
<div align="center">
|
|
109
|
+
|
|
105
110
|
# ragfallback
|
|
106
111
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
112
|
+
**The reliability layer for RAG pipelines that already work — until they don't.**
|
|
113
|
+
|
|
114
|
+
Drop into any LangChain-compatible stack. Catches bad chunks before they're embedded, fails over when retrieval goes empty, and scores answer quality on every run — so degradation shows up in CI, not in a user's support ticket.
|
|
115
|
+
|
|
116
|
+
[](https://pypi.org/project/ragfallback/)
|
|
110
117
|
[](https://pepy.tech/project/ragfallback)
|
|
111
118
|
[](https://github.com/irfanalidv/ragfallback/actions/workflows/test.yml)
|
|
119
|
+
[](https://github.com/irfanalidv/ragfallback/actions/workflows/lint.yml)
|
|
120
|
+
[](https://pypi.org/project/ragfallback/)
|
|
121
|
+
[](https://github.com/irfanalidv/ragfallback/blob/main/LICENSE)
|
|
122
|
+
[](https://github.com/irfanalidv/ragfallback/stargazers)
|
|
123
|
+
<br/>
|
|
112
124
|
[](https://colab.research.google.com/github/irfanalidv/ragfallback/blob/main/ragfallback_colab.ipynb)
|
|
113
|
-
[](https://github.com/irfanalidv/ragfallback/tree/main/ragfallback/mlops)
|
|
125
|
+
[](https://github.com/irfanalidv/ragfallback/tree/main/ragfallback/mlops)
|
|
126
|
+
[](#examples--real-public-datasets)
|
|
127
|
+
|
|
128
|
+
</div>
|
|
129
|
+
|
|
130
|
+
<br/>
|
|
114
131
|
|
|
115
|
-
|
|
132
|
+
<p align="center">
|
|
133
|
+
<img src="https://raw.githubusercontent.com/irfanalidv/ragfallback/main/ragfallback_arch.svg" alt="ragfallback architecture — diagnostics, retrieval, core, evaluation and MLOps modules across the ingest-to-operate pipeline" width="100%">
|
|
134
|
+
</p>
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Contents
|
|
139
|
+
|
|
140
|
+
- [Why ragfallback?](#why-ragfallback)
|
|
141
|
+
- [What it prevents](#what-it-prevents)
|
|
142
|
+
- [Quick start](#quick-start)
|
|
143
|
+
- [Configuration](#configuration)
|
|
144
|
+
- [Full pipeline](#full-pipeline)
|
|
145
|
+
- [Module reference](#module-reference)
|
|
146
|
+
- [Examples — real public datasets](#examples--real-public-datasets)
|
|
147
|
+
- [Verified numbers](#verified-numbers--squad-wikipedia-validation-set)
|
|
148
|
+
- [Install](#install)
|
|
149
|
+
- [MLOps — evaluation & regression gate](#mlops--evaluation--regression-gate)
|
|
150
|
+
- [Contributing](#contributing)
|
|
151
|
+
- [FAQ](#faq)
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Why ragfallback?
|
|
156
|
+
|
|
157
|
+
RAG pipelines rarely fail loudly. They fail by quietly returning an empty context, a half-relevant chunk, or a confident-sounding hallucination — and nothing in a typical LangChain + vector-store stack tells you that happened. ragfallback is not another retrieval framework competing with LangChain, LlamaIndex, or your vector DB; it's a thin layer of guards and checks that wraps the stack you already have.
|
|
158
|
+
|
|
159
|
+
| If your stack today is... | ragfallback adds |
|
|
160
|
+
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
|
|
161
|
+
| Raw LangChain retriever, no fallback | `FailoverRetriever` + `SmartThresholdHybridRetriever` — a second path when the first one goes empty |
|
|
162
|
+
| RAGAS or another eval library, run manually | `GoldenRunner` + `BaselineRegistry` — the same metrics wired into a CI gate that fails the build |
|
|
163
|
+
| Nothing — chunking and indexing "just work" for now | `ChunkQualityChecker` + `EmbeddingGuard` — catches the two most common silent corruption sources |
|
|
164
|
+
| Hand-rolled retry logic around an LLM call | `AdaptiveRAGRetriever` — confidence-scored retries with pluggable strategies, sync and async |
|
|
165
|
+
|
|
166
|
+
If you don't have any of the failure modes in the table below, you don't need this library. If you've shipped a RAG feature past a demo, you've probably hit at least three of them.
|
|
116
167
|
|
|
117
168
|
---
|
|
118
169
|
|
|
@@ -359,6 +410,22 @@ from ragfallback.retrieval import FailoverRetriever
|
|
|
359
410
|
retriever = FailoverRetriever(primary=chroma_retriever, fallback=faiss_retriever, min_results=1)
|
|
360
411
|
```
|
|
361
412
|
|
|
413
|
+
**ReRankerGuard** — pass-through hook for a second-stage reranker. Sits after vector retrieval, before the prompt; does nothing until you wire a `rerank_fn`, so it's safe to add to a pipeline today and fill in a cross-encoder later.
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
from ragfallback.retrieval import ReRankerGuard
|
|
417
|
+
guard = ReRankerGuard(rerank_fn=my_cross_encoder_rerank, top_n=4)
|
|
418
|
+
docs = guard.apply(query, retrieved_docs)
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
**RetrieverAsVectorStore** — wraps any LangChain `BaseRetriever` (e.g. `SmartThresholdHybridRetriever`) so it exposes the `as_retriever()` surface `AdaptiveRAGRetriever` expects.
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
from ragfallback.retrieval import RetrieverAsVectorStore
|
|
425
|
+
shim = RetrieverAsVectorStore(hybrid_retriever)
|
|
426
|
+
retriever = AdaptiveRAGRetriever(vector_store=shim, llm=llm)
|
|
427
|
+
```
|
|
428
|
+
|
|
362
429
|
---
|
|
363
430
|
|
|
364
431
|
### `ragfallback.core`
|
|
@@ -382,6 +449,19 @@ print(result.answer, result.confidence, result.attempts_used)
|
|
|
382
449
|
|
|
383
450
|
Requires `MISTRAL_API_KEY` (or any LangChain-compatible LLM passed via `llm=`).
|
|
384
451
|
|
|
452
|
+
**aquery_with_fallback** — native async version of `query_with_fallback()`. Real coroutine using LangChain `ainvoke()` — not a thread-pool wrapper. Falls back to thread pool automatically if the underlying LLM doesn't implement `ainvoke`.
|
|
453
|
+
|
|
454
|
+
```python
|
|
455
|
+
import asyncio
|
|
456
|
+
|
|
457
|
+
# async-native — LLM API calls overlap instead of serializing
|
|
458
|
+
result = await retriever.aquery_with_fallback("What is the refund policy?")
|
|
459
|
+
print(result.answer, result.confidence, result.attempts)
|
|
460
|
+
|
|
461
|
+
# works in FastAPI, GoldenRunner.run_async(), or any async context
|
|
462
|
+
asyncio.run(retriever.aquery_with_fallback("How do API tokens expire?"))
|
|
463
|
+
```
|
|
464
|
+
|
|
385
465
|
---
|
|
386
466
|
|
|
387
467
|
### `ragfallback.strategies`
|
|
@@ -419,6 +499,42 @@ metrics.record_attempt(success=True, latency_ms=120, confidence=0.85)
|
|
|
419
499
|
print(metrics.get_stats())
|
|
420
500
|
```
|
|
421
501
|
|
|
502
|
+
**CacheMonitor** — wraps any LangChain retriever to track cache hit rate, per-category latency (hit vs miss), TTL-based expiry, and LRU eviction. Zero new dependencies — stdlib only. Supports both sync `invoke()` and async `ainvoke()`.
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
from ragfallback.tracking import CacheMonitor
|
|
506
|
+
|
|
507
|
+
monitor = CacheMonitor(max_size=512, ttl_seconds=600)
|
|
508
|
+
cached_retriever = monitor.wrap_retriever(store.as_retriever(search_kwargs={"k": 4}))
|
|
509
|
+
|
|
510
|
+
# use cached_retriever exactly like any LangChain retriever
|
|
511
|
+
docs = cached_retriever.invoke("What is the refund policy?")
|
|
512
|
+
|
|
513
|
+
print(monitor.summary())
|
|
514
|
+
# → cache hit_rate=34.7% hits=26 misses=49 entries=49 evictions=0
|
|
515
|
+
|
|
516
|
+
stats = monitor.get_stats()
|
|
517
|
+
print(stats.hit_rate, stats.avg_hit_latency_ms, stats.avg_miss_latency_ms)
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
Pass to `GoldenRunner` to capture cache efficiency alongside RAGAS scores:
|
|
521
|
+
|
|
522
|
+
```python
|
|
523
|
+
from ragfallback.mlops import GoldenRunner, RagasHook
|
|
524
|
+
from ragfallback.tracking import CacheMonitor
|
|
525
|
+
|
|
526
|
+
monitor = CacheMonitor(max_size=256, ttl_seconds=300)
|
|
527
|
+
runner = GoldenRunner(
|
|
528
|
+
retriever=retriever,
|
|
529
|
+
ragas_hook=hook,
|
|
530
|
+
dataset="examples/golden_qa.json",
|
|
531
|
+
cache_monitor=monitor,
|
|
532
|
+
)
|
|
533
|
+
report = asyncio.run(runner.run_async())
|
|
534
|
+
print(report.cache_stats)
|
|
535
|
+
# → {"hit_rate": 0.347, "hits": 26, "misses": 49, "evictions": 0, ...}
|
|
536
|
+
```
|
|
537
|
+
|
|
422
538
|
---
|
|
423
539
|
|
|
424
540
|
### `ragfallback.evaluation`
|
|
@@ -480,7 +596,8 @@ RAGEvaluator (10 real Q&A pairs, heuristic, no LLM judge):
|
|
|
480
596
|
Avg overall : 62.9%
|
|
481
597
|
```
|
|
482
598
|
|
|
483
|
-
Install: `pip install ragfallback[chroma,huggingface,real-data]`
|
|
599
|
+
Install: `pip install ragfallback[chroma,huggingface,real-data]`
|
|
600
|
+
|
|
484
601
|
Dataset: [rajpurkar/squad](https://huggingface.co/datasets/rajpurkar/squad) — CC BY-SA 4.0
|
|
485
602
|
|
|
486
603
|
---
|
|
@@ -511,16 +628,20 @@ pip install ragfallback[mlops] # MLOps eval layer (RAGAS +
|
|
|
511
628
|
## Subpackage import map
|
|
512
629
|
|
|
513
630
|
```python
|
|
514
|
-
from ragfallback import AdaptiveRAGRetriever, QueryResult, CostTracker, MetricsCollector
|
|
631
|
+
from ragfallback import AdaptiveRAGRetriever, QueryResult, CostTracker, MetricsCollector, CacheMonitor
|
|
515
632
|
|
|
516
633
|
from ragfallback.diagnostics import (
|
|
517
634
|
ChunkQualityChecker, EmbeddingGuard, EmbeddingQualityProbe,
|
|
518
635
|
RetrievalHealthCheck, StaleIndexDetector, ContextWindowGuard,
|
|
519
636
|
OverlappingContextStitcher, sanitize_documents, sanitize_metadata,
|
|
520
637
|
)
|
|
521
|
-
from ragfallback.retrieval import
|
|
638
|
+
from ragfallback.retrieval import (
|
|
639
|
+
SmartThresholdHybridRetriever, FailoverRetriever,
|
|
640
|
+
ReRankerGuard, RetrieverAsVectorStore,
|
|
641
|
+
)
|
|
522
642
|
from ragfallback.strategies import QueryVariationsStrategy, MultiHopFallbackStrategy
|
|
523
643
|
from ragfallback.evaluation import RAGEvaluator
|
|
644
|
+
from ragfallback.tracking import CacheMonitor, CacheStats
|
|
524
645
|
from ragfallback.mlops import (
|
|
525
646
|
RagasHook, RagasReport,
|
|
526
647
|
BaselineRegistry, RegressionError,
|
|
@@ -616,11 +737,48 @@ python examples/ci_regression_gate.py # exits 0 (pass) or 1 (fail)
|
|
|
616
737
|
|
|
617
738
|
---
|
|
618
739
|
|
|
740
|
+
## FAQ
|
|
741
|
+
|
|
742
|
+
**Does this replace LangChain / LlamaIndex / my vector DB?**
|
|
743
|
+
No. ragfallback wraps whatever retriever and vector store you already use. It adds checks and fallback paths; it doesn't add a new abstraction layer you have to migrate to.
|
|
744
|
+
|
|
745
|
+
**Do I need an LLM API key to use this?**
|
|
746
|
+
No for most of it. `ChunkQualityChecker`, `EmbeddingGuard`, `RetrievalHealthCheck`, `SmartThresholdHybridRetriever`, `ContextWindowGuard`, and `RAGEvaluator` (heuristic mode) all run locally. Only `AdaptiveRAGRetriever`, `QueryVariationsStrategy`, and `MultiHopFallbackStrategy` need an LLM, and any LangChain-compatible one works — including local Ollama models.
|
|
747
|
+
|
|
748
|
+
**Why are the example numbers different every time I run them?**
|
|
749
|
+
Because they're computed live against real public datasets (SQuAD, PubMedQA, CUAD), not hardcoded. The README's "Verified numbers" section is the literal stdout of `examples/real_data_demo.py` — run it yourself to confirm.
|
|
750
|
+
|
|
751
|
+
**Is this production-ready?**
|
|
752
|
+
It's used in the author's own RAG pipelines and has a CI regression gate that runs on every push (see badge above). It's tagged Beta on PyPI because the public API can still shift between minor versions — pin a version in production and read [CHANGELOG.md](CHANGELOG.md) before upgrading.
|
|
753
|
+
|
|
754
|
+
**How is this different from RAGAS?**
|
|
755
|
+
RAGAS scores answer quality. ragfallback includes a thin RAGAS-compatible hook (`ragfallback.mlops.RagasHook`) for that, but the rest of the library is about *preventing* failures before they reach evaluation — chunk quality, embedding integrity, retrieval fallback, and context-window fit. Use both; they solve different parts of the pipeline.
|
|
756
|
+
|
|
757
|
+
---
|
|
758
|
+
|
|
759
|
+
## Star history
|
|
760
|
+
|
|
761
|
+
<a href="https://star-history.com/#irfanalidv/ragfallback&Date">
|
|
762
|
+
<img src="https://api.star-history.com/svg?repos=irfanalidv/ragfallback&type=Date" alt="Star History Chart" width="100%">
|
|
763
|
+
</a>
|
|
764
|
+
|
|
765
|
+
---
|
|
766
|
+
|
|
619
767
|
## Contributing
|
|
620
768
|
|
|
621
769
|
See [CONTRIBUTING.md](CONTRIBUTING.md). The quick version: run `pytest tests/unit/ -v` before any PR, follow Google-style docstrings, use `logging` not `print`, and update `__all__` in the subpackage `__init__.py`.
|
|
622
770
|
|
|
623
771
|
## License · Changelog
|
|
624
772
|
|
|
625
|
-
MIT License — see [LICENSE](LICENSE).
|
|
773
|
+
MIT License — see [LICENSE](LICENSE).
|
|
774
|
+
|
|
626
775
|
Full version history in [CHANGELOG.md](CHANGELOG.md).
|
|
776
|
+
|
|
777
|
+
---
|
|
778
|
+
|
|
779
|
+
<div align="center">
|
|
780
|
+
|
|
781
|
+
Built and maintained by **[Irfan Ali](https://github.com/irfanalidv)** — Senior AI Engineer (LLMs, RAG, agents, voice AI).
|
|
782
|
+
Part of an [11-package open-source toolkit](https://pypi.org/user/irfanalidv/) for production RAG and agent systems.
|
|
783
|
+
|
|
784
|
+
</div>
|
|
@@ -1,14 +1,62 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# ragfallback
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
5
|
+
**The reliability layer for RAG pipelines that already work — until they don't.**
|
|
6
|
+
|
|
7
|
+
Drop into any LangChain-compatible stack. Catches bad chunks before they're embedded, fails over when retrieval goes empty, and scores answer quality on every run — so degradation shows up in CI, not in a user's support ticket.
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/ragfallback/)
|
|
6
10
|
[](https://pepy.tech/project/ragfallback)
|
|
7
11
|
[](https://github.com/irfanalidv/ragfallback/actions/workflows/test.yml)
|
|
12
|
+
[](https://github.com/irfanalidv/ragfallback/actions/workflows/lint.yml)
|
|
13
|
+
[](https://pypi.org/project/ragfallback/)
|
|
14
|
+
[](https://github.com/irfanalidv/ragfallback/blob/main/LICENSE)
|
|
15
|
+
[](https://github.com/irfanalidv/ragfallback/stargazers)
|
|
16
|
+
<br/>
|
|
8
17
|
[](https://colab.research.google.com/github/irfanalidv/ragfallback/blob/main/ragfallback_colab.ipynb)
|
|
9
|
-
[](https://github.com/irfanalidv/ragfallback/tree/main/ragfallback/mlops)
|
|
18
|
+
[](https://github.com/irfanalidv/ragfallback/tree/main/ragfallback/mlops)
|
|
19
|
+
[](#examples--real-public-datasets)
|
|
20
|
+
|
|
21
|
+
</div>
|
|
22
|
+
|
|
23
|
+
<br/>
|
|
10
24
|
|
|
11
|
-
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="https://raw.githubusercontent.com/irfanalidv/ragfallback/main/ragfallback_arch.svg" alt="ragfallback architecture — diagnostics, retrieval, core, evaluation and MLOps modules across the ingest-to-operate pipeline" width="100%">
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Contents
|
|
32
|
+
|
|
33
|
+
- [Why ragfallback?](#why-ragfallback)
|
|
34
|
+
- [What it prevents](#what-it-prevents)
|
|
35
|
+
- [Quick start](#quick-start)
|
|
36
|
+
- [Configuration](#configuration)
|
|
37
|
+
- [Full pipeline](#full-pipeline)
|
|
38
|
+
- [Module reference](#module-reference)
|
|
39
|
+
- [Examples — real public datasets](#examples--real-public-datasets)
|
|
40
|
+
- [Verified numbers](#verified-numbers--squad-wikipedia-validation-set)
|
|
41
|
+
- [Install](#install)
|
|
42
|
+
- [MLOps — evaluation & regression gate](#mlops--evaluation--regression-gate)
|
|
43
|
+
- [Contributing](#contributing)
|
|
44
|
+
- [FAQ](#faq)
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Why ragfallback?
|
|
49
|
+
|
|
50
|
+
RAG pipelines rarely fail loudly. They fail by quietly returning an empty context, a half-relevant chunk, or a confident-sounding hallucination — and nothing in a typical LangChain + vector-store stack tells you that happened. ragfallback is not another retrieval framework competing with LangChain, LlamaIndex, or your vector DB; it's a thin layer of guards and checks that wraps the stack you already have.
|
|
51
|
+
|
|
52
|
+
| If your stack today is... | ragfallback adds |
|
|
53
|
+
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
|
|
54
|
+
| Raw LangChain retriever, no fallback | `FailoverRetriever` + `SmartThresholdHybridRetriever` — a second path when the first one goes empty |
|
|
55
|
+
| RAGAS or another eval library, run manually | `GoldenRunner` + `BaselineRegistry` — the same metrics wired into a CI gate that fails the build |
|
|
56
|
+
| Nothing — chunking and indexing "just work" for now | `ChunkQualityChecker` + `EmbeddingGuard` — catches the two most common silent corruption sources |
|
|
57
|
+
| Hand-rolled retry logic around an LLM call | `AdaptiveRAGRetriever` — confidence-scored retries with pluggable strategies, sync and async |
|
|
58
|
+
|
|
59
|
+
If you don't have any of the failure modes in the table below, you don't need this library. If you've shipped a RAG feature past a demo, you've probably hit at least three of them.
|
|
12
60
|
|
|
13
61
|
---
|
|
14
62
|
|
|
@@ -255,6 +303,22 @@ from ragfallback.retrieval import FailoverRetriever
|
|
|
255
303
|
retriever = FailoverRetriever(primary=chroma_retriever, fallback=faiss_retriever, min_results=1)
|
|
256
304
|
```
|
|
257
305
|
|
|
306
|
+
**ReRankerGuard** — pass-through hook for a second-stage reranker. Sits after vector retrieval, before the prompt; does nothing until you wire a `rerank_fn`, so it's safe to add to a pipeline today and fill in a cross-encoder later.
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
from ragfallback.retrieval import ReRankerGuard
|
|
310
|
+
guard = ReRankerGuard(rerank_fn=my_cross_encoder_rerank, top_n=4)
|
|
311
|
+
docs = guard.apply(query, retrieved_docs)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
**RetrieverAsVectorStore** — wraps any LangChain `BaseRetriever` (e.g. `SmartThresholdHybridRetriever`) so it exposes the `as_retriever()` surface `AdaptiveRAGRetriever` expects.
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
from ragfallback.retrieval import RetrieverAsVectorStore
|
|
318
|
+
shim = RetrieverAsVectorStore(hybrid_retriever)
|
|
319
|
+
retriever = AdaptiveRAGRetriever(vector_store=shim, llm=llm)
|
|
320
|
+
```
|
|
321
|
+
|
|
258
322
|
---
|
|
259
323
|
|
|
260
324
|
### `ragfallback.core`
|
|
@@ -278,6 +342,19 @@ print(result.answer, result.confidence, result.attempts_used)
|
|
|
278
342
|
|
|
279
343
|
Requires `MISTRAL_API_KEY` (or any LangChain-compatible LLM passed via `llm=`).
|
|
280
344
|
|
|
345
|
+
**aquery_with_fallback** — native async version of `query_with_fallback()`. Real coroutine using LangChain `ainvoke()` — not a thread-pool wrapper. Falls back to thread pool automatically if the underlying LLM doesn't implement `ainvoke`.
|
|
346
|
+
|
|
347
|
+
```python
|
|
348
|
+
import asyncio
|
|
349
|
+
|
|
350
|
+
# async-native — LLM API calls overlap instead of serializing
|
|
351
|
+
result = await retriever.aquery_with_fallback("What is the refund policy?")
|
|
352
|
+
print(result.answer, result.confidence, result.attempts)
|
|
353
|
+
|
|
354
|
+
# works in FastAPI, GoldenRunner.run_async(), or any async context
|
|
355
|
+
asyncio.run(retriever.aquery_with_fallback("How do API tokens expire?"))
|
|
356
|
+
```
|
|
357
|
+
|
|
281
358
|
---
|
|
282
359
|
|
|
283
360
|
### `ragfallback.strategies`
|
|
@@ -315,6 +392,42 @@ metrics.record_attempt(success=True, latency_ms=120, confidence=0.85)
|
|
|
315
392
|
print(metrics.get_stats())
|
|
316
393
|
```
|
|
317
394
|
|
|
395
|
+
**CacheMonitor** — wraps any LangChain retriever to track cache hit rate, per-category latency (hit vs miss), TTL-based expiry, and LRU eviction. Zero new dependencies — stdlib only. Supports both sync `invoke()` and async `ainvoke()`.
|
|
396
|
+
|
|
397
|
+
```python
|
|
398
|
+
from ragfallback.tracking import CacheMonitor
|
|
399
|
+
|
|
400
|
+
monitor = CacheMonitor(max_size=512, ttl_seconds=600)
|
|
401
|
+
cached_retriever = monitor.wrap_retriever(store.as_retriever(search_kwargs={"k": 4}))
|
|
402
|
+
|
|
403
|
+
# use cached_retriever exactly like any LangChain retriever
|
|
404
|
+
docs = cached_retriever.invoke("What is the refund policy?")
|
|
405
|
+
|
|
406
|
+
print(monitor.summary())
|
|
407
|
+
# → cache hit_rate=34.7% hits=26 misses=49 entries=49 evictions=0
|
|
408
|
+
|
|
409
|
+
stats = monitor.get_stats()
|
|
410
|
+
print(stats.hit_rate, stats.avg_hit_latency_ms, stats.avg_miss_latency_ms)
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
Pass to `GoldenRunner` to capture cache efficiency alongside RAGAS scores:
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
from ragfallback.mlops import GoldenRunner, RagasHook
|
|
417
|
+
from ragfallback.tracking import CacheMonitor
|
|
418
|
+
|
|
419
|
+
monitor = CacheMonitor(max_size=256, ttl_seconds=300)
|
|
420
|
+
runner = GoldenRunner(
|
|
421
|
+
retriever=retriever,
|
|
422
|
+
ragas_hook=hook,
|
|
423
|
+
dataset="examples/golden_qa.json",
|
|
424
|
+
cache_monitor=monitor,
|
|
425
|
+
)
|
|
426
|
+
report = asyncio.run(runner.run_async())
|
|
427
|
+
print(report.cache_stats)
|
|
428
|
+
# → {"hit_rate": 0.347, "hits": 26, "misses": 49, "evictions": 0, ...}
|
|
429
|
+
```
|
|
430
|
+
|
|
318
431
|
---
|
|
319
432
|
|
|
320
433
|
### `ragfallback.evaluation`
|
|
@@ -376,7 +489,8 @@ RAGEvaluator (10 real Q&A pairs, heuristic, no LLM judge):
|
|
|
376
489
|
Avg overall : 62.9%
|
|
377
490
|
```
|
|
378
491
|
|
|
379
|
-
Install: `pip install ragfallback[chroma,huggingface,real-data]`
|
|
492
|
+
Install: `pip install ragfallback[chroma,huggingface,real-data]`
|
|
493
|
+
|
|
380
494
|
Dataset: [rajpurkar/squad](https://huggingface.co/datasets/rajpurkar/squad) — CC BY-SA 4.0
|
|
381
495
|
|
|
382
496
|
---
|
|
@@ -407,16 +521,20 @@ pip install ragfallback[mlops] # MLOps eval layer (RAGAS +
|
|
|
407
521
|
## Subpackage import map
|
|
408
522
|
|
|
409
523
|
```python
|
|
410
|
-
from ragfallback import AdaptiveRAGRetriever, QueryResult, CostTracker, MetricsCollector
|
|
524
|
+
from ragfallback import AdaptiveRAGRetriever, QueryResult, CostTracker, MetricsCollector, CacheMonitor
|
|
411
525
|
|
|
412
526
|
from ragfallback.diagnostics import (
|
|
413
527
|
ChunkQualityChecker, EmbeddingGuard, EmbeddingQualityProbe,
|
|
414
528
|
RetrievalHealthCheck, StaleIndexDetector, ContextWindowGuard,
|
|
415
529
|
OverlappingContextStitcher, sanitize_documents, sanitize_metadata,
|
|
416
530
|
)
|
|
417
|
-
from ragfallback.retrieval import
|
|
531
|
+
from ragfallback.retrieval import (
|
|
532
|
+
SmartThresholdHybridRetriever, FailoverRetriever,
|
|
533
|
+
ReRankerGuard, RetrieverAsVectorStore,
|
|
534
|
+
)
|
|
418
535
|
from ragfallback.strategies import QueryVariationsStrategy, MultiHopFallbackStrategy
|
|
419
536
|
from ragfallback.evaluation import RAGEvaluator
|
|
537
|
+
from ragfallback.tracking import CacheMonitor, CacheStats
|
|
420
538
|
from ragfallback.mlops import (
|
|
421
539
|
RagasHook, RagasReport,
|
|
422
540
|
BaselineRegistry, RegressionError,
|
|
@@ -512,11 +630,48 @@ python examples/ci_regression_gate.py # exits 0 (pass) or 1 (fail)
|
|
|
512
630
|
|
|
513
631
|
---
|
|
514
632
|
|
|
633
|
+
## FAQ
|
|
634
|
+
|
|
635
|
+
**Does this replace LangChain / LlamaIndex / my vector DB?**
|
|
636
|
+
No. ragfallback wraps whatever retriever and vector store you already use. It adds checks and fallback paths; it doesn't add a new abstraction layer you have to migrate to.
|
|
637
|
+
|
|
638
|
+
**Do I need an LLM API key to use this?**
|
|
639
|
+
No for most of it. `ChunkQualityChecker`, `EmbeddingGuard`, `RetrievalHealthCheck`, `SmartThresholdHybridRetriever`, `ContextWindowGuard`, and `RAGEvaluator` (heuristic mode) all run locally. Only `AdaptiveRAGRetriever`, `QueryVariationsStrategy`, and `MultiHopFallbackStrategy` need an LLM, and any LangChain-compatible one works — including local Ollama models.
|
|
640
|
+
|
|
641
|
+
**Why are the example numbers different every time I run them?**
|
|
642
|
+
Because they're computed live against real public datasets (SQuAD, PubMedQA, CUAD), not hardcoded. The README's "Verified numbers" section is the literal stdout of `examples/real_data_demo.py` — run it yourself to confirm.
|
|
643
|
+
|
|
644
|
+
**Is this production-ready?**
|
|
645
|
+
It's used in the author's own RAG pipelines and has a CI regression gate that runs on every push (see badge above). It's tagged Beta on PyPI because the public API can still shift between minor versions — pin a version in production and read [CHANGELOG.md](CHANGELOG.md) before upgrading.
|
|
646
|
+
|
|
647
|
+
**How is this different from RAGAS?**
|
|
648
|
+
RAGAS scores answer quality. ragfallback includes a thin RAGAS-compatible hook (`ragfallback.mlops.RagasHook`) for that, but the rest of the library is about *preventing* failures before they reach evaluation — chunk quality, embedding integrity, retrieval fallback, and context-window fit. Use both; they solve different parts of the pipeline.
|
|
649
|
+
|
|
650
|
+
---
|
|
651
|
+
|
|
652
|
+
## Star history
|
|
653
|
+
|
|
654
|
+
<a href="https://star-history.com/#irfanalidv/ragfallback&Date">
|
|
655
|
+
<img src="https://api.star-history.com/svg?repos=irfanalidv/ragfallback&type=Date" alt="Star History Chart" width="100%">
|
|
656
|
+
</a>
|
|
657
|
+
|
|
658
|
+
---
|
|
659
|
+
|
|
515
660
|
## Contributing
|
|
516
661
|
|
|
517
662
|
See [CONTRIBUTING.md](CONTRIBUTING.md). The quick version: run `pytest tests/unit/ -v` before any PR, follow Google-style docstrings, use `logging` not `print`, and update `__all__` in the subpackage `__init__.py`.
|
|
518
663
|
|
|
519
664
|
## License · Changelog
|
|
520
665
|
|
|
521
|
-
MIT License — see [LICENSE](LICENSE).
|
|
666
|
+
MIT License — see [LICENSE](LICENSE).
|
|
667
|
+
|
|
522
668
|
Full version history in [CHANGELOG.md](CHANGELOG.md).
|
|
669
|
+
|
|
670
|
+
---
|
|
671
|
+
|
|
672
|
+
<div align="center">
|
|
673
|
+
|
|
674
|
+
Built and maintained by **[Irfan Ali](https://github.com/irfanalidv)** — Senior AI Engineer (LLMs, RAG, agents, voice AI).
|
|
675
|
+
Part of an [11-package open-source toolkit](https://pypi.org/user/irfanalidv/) for production RAG and agent systems.
|
|
676
|
+
|
|
677
|
+
</div>
|
|
@@ -32,7 +32,9 @@ def _doc_id(text: str, prefix: str = "doc") -> str:
|
|
|
32
32
|
return f"{prefix}_{h}"
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def build_squad_samples(
|
|
35
|
+
def build_squad_samples(
|
|
36
|
+
n: int = 75,
|
|
37
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
36
38
|
"""
|
|
37
39
|
Load SQuAD validation split.
|
|
38
40
|
|
|
@@ -104,7 +106,9 @@ def build_squad_samples(n: int = 75) -> Tuple[List[Dict[str, Any]], List[Dict[st
|
|
|
104
106
|
return samples, docs_meta
|
|
105
107
|
|
|
106
108
|
|
|
107
|
-
def build_sciq_samples(
|
|
109
|
+
def build_sciq_samples(
|
|
110
|
+
n: int = 25,
|
|
111
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
108
112
|
"""
|
|
109
113
|
Load SciQ test split — science domain, harder than SQuAD.
|
|
110
114
|
|
|
@@ -33,6 +33,7 @@ warnings.filterwarnings(
|
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
import _kb_common
|
|
36
|
+
|
|
36
37
|
from ragfallback import AdaptiveRAGRetriever, CostTracker, MetricsCollector
|
|
37
38
|
from ragfallback.utils.llm_factory import create_open_source_llm
|
|
38
39
|
|
|
@@ -66,7 +67,9 @@ def _run_demo() -> int:
|
|
|
66
67
|
collection_name="ragfallback_kb_demo",
|
|
67
68
|
)
|
|
68
69
|
except ImportError as e:
|
|
69
|
-
print(
|
|
70
|
+
print(
|
|
71
|
+
f"{e}\nInstall: pip install chromadb sentence-transformers", file=sys.stderr
|
|
72
|
+
)
|
|
70
73
|
return 1
|
|
71
74
|
|
|
72
75
|
print("Chroma collection ready (embeddings computed from real file contents).\n")
|
|
@@ -118,7 +121,11 @@ def _run_demo() -> int:
|
|
|
118
121
|
)
|
|
119
122
|
except Exception as e:
|
|
120
123
|
err = str(e).lower()
|
|
121
|
-
if
|
|
124
|
+
if (
|
|
125
|
+
"connection refused" in err
|
|
126
|
+
or "11434" in err
|
|
127
|
+
or "failed to establish" in err
|
|
128
|
+
):
|
|
122
129
|
print(
|
|
123
130
|
"\n(Ollama not reachable — retrieval-only preview, no paid API keys.)\n"
|
|
124
131
|
"For full adaptive RAG: install https://ollama.ai and run: ollama pull llama3\n"
|
|
@@ -127,7 +134,9 @@ def _run_demo() -> int:
|
|
|
127
134
|
for i, doc in enumerate(hits, 1):
|
|
128
135
|
src = (doc.metadata or {}).get("source", "?")
|
|
129
136
|
body = (doc.page_content or "")[:400].replace("\n", " ")
|
|
130
|
-
print(
|
|
137
|
+
print(
|
|
138
|
+
f" [{i}] source={src}\n {body}{'…' if len(doc.page_content or '') > 400 else ''}\n"
|
|
139
|
+
)
|
|
131
140
|
return 0
|
|
132
141
|
print(
|
|
133
142
|
"Adaptive RAG failed. Is Ollama running?\n"
|
|
@@ -138,7 +147,9 @@ def _run_demo() -> int:
|
|
|
138
147
|
return 1
|
|
139
148
|
|
|
140
149
|
print(f"\nAnswer:\n {result.answer}\n")
|
|
141
|
-
print(
|
|
150
|
+
print(
|
|
151
|
+
f"Confidence: {result.confidence:.2%} | attempts: {result.attempts} | cost: ${result.cost:.4f}"
|
|
152
|
+
)
|
|
142
153
|
|
|
143
154
|
if result.intermediate_steps:
|
|
144
155
|
print("\nIntermediate steps (queries tried):")
|
|
@@ -153,7 +164,9 @@ def _run_demo() -> int:
|
|
|
153
164
|
def main() -> int:
|
|
154
165
|
import logging
|
|
155
166
|
|
|
156
|
-
logging.getLogger("ragfallback.strategies.query_variations").setLevel(
|
|
167
|
+
logging.getLogger("ragfallback.strategies.query_variations").setLevel(
|
|
168
|
+
logging.CRITICAL
|
|
169
|
+
)
|
|
157
170
|
with warnings.catch_warnings():
|
|
158
171
|
warnings.simplefilter("ignore")
|
|
159
172
|
return _run_demo()
|
|
@@ -158,14 +158,16 @@ async def run_gate() -> int:
|
|
|
158
158
|
print(
|
|
159
159
|
f" Comparing against baseline (recorded: {baseline.get('recorded_at', 'unknown')})"
|
|
160
160
|
)
|
|
161
|
-
print(
|
|
161
|
+
print(
|
|
162
|
+
" Threshold: 5% quality metrics; latency not gated (CI runners too noisy) → FAIL"
|
|
163
|
+
)
|
|
162
164
|
|
|
163
165
|
try:
|
|
164
166
|
registry.compare_or_fail(
|
|
165
167
|
report,
|
|
166
168
|
dataset=dataset_name,
|
|
167
169
|
threshold=0.05,
|
|
168
|
-
latency_threshold=0
|
|
170
|
+
latency_threshold=5.0, # 500% — P95 latency varies wildly on GH Actions shared runners
|
|
169
171
|
)
|
|
170
172
|
registry.update(report, dataset=dataset_name)
|
|
171
173
|
print("\n RESULT: PASS ✓ — No regression detected")
|
|
@@ -13,11 +13,14 @@ Env vars : NONE required for retrieval demo; HF_TOKEN optional for LLM
|
|
|
13
13
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
-
import sys
|
|
17
16
|
import os
|
|
17
|
+
import sys
|
|
18
18
|
|
|
19
19
|
_repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
20
|
-
if
|
|
20
|
+
if (
|
|
21
|
+
os.path.isdir(os.path.join(_repo_root, "ragfallback"))
|
|
22
|
+
and _repo_root not in sys.path
|
|
23
|
+
):
|
|
21
24
|
sys.path.insert(0, _repo_root)
|
|
22
25
|
|
|
23
26
|
_examples_dir = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -66,7 +69,9 @@ def main() -> None:
|
|
|
66
69
|
|
|
67
70
|
checker = ChunkQualityChecker(min_chars=40)
|
|
68
71
|
report = checker.check(documents)
|
|
69
|
-
print(
|
|
72
|
+
print(
|
|
73
|
+
f"\nChunkQualityChecker: {report.n_chunks} sentences Violations: {len(report.violations)}"
|
|
74
|
+
)
|
|
70
75
|
|
|
71
76
|
import _kb_common
|
|
72
77
|
|
|
@@ -96,7 +101,9 @@ def main() -> None:
|
|
|
96
101
|
print(f" → {best}...")
|
|
97
102
|
|
|
98
103
|
print("\n✅ Financial RAG demo complete (no paid API keys used).")
|
|
99
|
-
print(
|
|
104
|
+
print(
|
|
105
|
+
" To add LLM generation: set HF_TOKEN env var and pass an LLM to AdaptiveRAGRetriever."
|
|
106
|
+
)
|
|
100
107
|
|
|
101
108
|
|
|
102
109
|
if __name__ == "__main__":
|