ragcheck-cli 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. ragcheck_cli-0.2.0/.github/workflows/ragcheck.yml +25 -0
  2. ragcheck_cli-0.2.0/.github/workflows/tests.yml +22 -0
  3. ragcheck_cli-0.2.0/.gitignore +12 -0
  4. ragcheck_cli-0.2.0/CHANGELOG.md +36 -0
  5. ragcheck_cli-0.2.0/CONTRIBUTING.md +31 -0
  6. ragcheck_cli-0.2.0/LICENSE +21 -0
  7. ragcheck_cli-0.2.0/MANIFEST.in +5 -0
  8. ragcheck_cli-0.2.0/PKG-INFO +193 -0
  9. ragcheck_cli-0.2.0/README.md +154 -0
  10. ragcheck_cli-0.2.0/docs/ARCHITECTURE.md +40 -0
  11. ragcheck_cli-0.2.0/examples/chunk_demo.py +47 -0
  12. ragcheck_cli-0.2.0/examples/classifier_demo.py +94 -0
  13. ragcheck_cli-0.2.0/examples/demo.py +91 -0
  14. ragcheck_cli-0.2.0/examples/embed_demo.py +50 -0
  15. ragcheck_cli-0.2.0/examples/full_pipeline_demo.py +36 -0
  16. ragcheck_cli-0.2.0/examples/qa_demo.py +73 -0
  17. ragcheck_cli-0.2.0/examples/report_demo.py +65 -0
  18. ragcheck_cli-0.2.0/legal_data/BNSS 2023.pdf +0 -0
  19. ragcheck_cli-0.2.0/legal_data/BNS_2023.pdf +0 -0
  20. ragcheck_cli-0.2.0/legal_data/BSA_2023.pdf +0 -0
  21. ragcheck_cli-0.2.0/pyproject.toml +83 -0
  22. ragcheck_cli-0.2.0/ragcheck/__init__.py +3 -0
  23. ragcheck_cli-0.2.0/ragcheck/__main__.py +6 -0
  24. ragcheck_cli-0.2.0/ragcheck/analyzers/__init__.py +1 -0
  25. ragcheck_cli-0.2.0/ragcheck/analyzers/chunkers.py +289 -0
  26. ragcheck_cli-0.2.0/ragcheck/analyzers/failure_classifier.py +174 -0
  27. ragcheck_cli-0.2.0/ragcheck/analyzers/recommender.py +176 -0
  28. ragcheck_cli-0.2.0/ragcheck/cli.py +211 -0
  29. ragcheck_cli-0.2.0/ragcheck/core/__init__.py +1 -0
  30. ragcheck_cli-0.2.0/ragcheck/core/config.py +75 -0
  31. ragcheck_cli-0.2.0/ragcheck/core/config_loader.py +55 -0
  32. ragcheck_cli-0.2.0/ragcheck/core/document_loader.py +99 -0
  33. ragcheck_cli-0.2.0/ragcheck/core/embeddings.py +38 -0
  34. ragcheck_cli-0.2.0/ragcheck/core/progress.py +41 -0
  35. ragcheck_cli-0.2.0/ragcheck/core/vector_store.py +81 -0
  36. ragcheck_cli-0.2.0/ragcheck/reports/__init__.py +1 -0
  37. ragcheck_cli-0.2.0/ragcheck/reports/chunk_visualizer.py +132 -0
  38. ragcheck_cli-0.2.0/ragcheck/reports/export.py +52 -0
  39. ragcheck_cli-0.2.0/ragcheck/reports/generator.py +235 -0
  40. ragcheck_cli-0.2.0/ragcheck/reports/html_report.py +460 -0
  41. ragcheck_cli-0.2.0/ragcheck/testers/__init__.py +1 -0
  42. ragcheck_cli-0.2.0/ragcheck/testers/auto_qa.py +221 -0
  43. ragcheck_cli-0.2.0/ragcheck/testers/retrieval_tester.py +185 -0
  44. ragcheck_cli-0.2.0/ragcheck.yaml +27 -0
  45. ragcheck_cli-0.2.0/sample_data/components.txt +1 -0
  46. ragcheck_cli-0.2.0/sample_data/intro.txt +1 -0
  47. ragcheck_cli-0.2.0/tests/__init__.py +1 -0
  48. ragcheck_cli-0.2.0/tests/integration/test_end_to_end.py +74 -0
  49. ragcheck_cli-0.2.0/tests/integration/test_pip_install.py +26 -0
  50. ragcheck_cli-0.2.0/tests/unit/__init__.py +1 -0
  51. ragcheck_cli-0.2.0/tests/unit/test_auto_qa.py +36 -0
  52. ragcheck_cli-0.2.0/tests/unit/test_chunkers.py +150 -0
  53. ragcheck_cli-0.2.0/tests/unit/test_cli.py +44 -0
  54. ragcheck_cli-0.2.0/tests/unit/test_config.py +33 -0
  55. ragcheck_cli-0.2.0/tests/unit/test_embeddings.py +30 -0
  56. ragcheck_cli-0.2.0/tests/unit/test_failure_classifier.py +58 -0
  57. ragcheck_cli-0.2.0/tests/unit/test_html_report.py +73 -0
  58. ragcheck_cli-0.2.0/tests/unit/test_recommender.py +70 -0
  59. ragcheck_cli-0.2.0/tests/unit/test_retrieval_tester.py +97 -0
  60. ragcheck_cli-0.2.0/tests/unit/test_vector_store.py +61 -0
@@ -0,0 +1,25 @@
1
+ name: RAG Quality Check
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ ragcheck:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+ - run: curl -LsSf https://astral.sh/uv/install.sh | sh
18
+ - run: uv sync
19
+ - run: uv run ragcheck run --docs ./data --ci --min-score 0.80
20
+ env:
21
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
22
+ - uses: actions/upload-artifact@v4
23
+ with:
24
+ name: ragcheck-report
25
+ path: ragcheck_report.html
@@ -0,0 +1,22 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ - run: curl -LsSf https://astral.sh/uv/install.sh | sh
21
+ - run: uv sync
22
+ - run: uv run pytest tests/ -v --cov=ragcheck --cov-report=term-missing
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .env
4
+ .venv/
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ .pytest_cache/
9
+ .mypy_cache/
10
+ .ruff_cache/
11
+ *.html
12
+ *.png
@@ -0,0 +1,36 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.2.0] - 2026-06-04
6
+
7
+ ### Added
8
+ - **Offline HTML reports** — Replaced Plotly CDN with pure CSS/HTML charts. Reports work without internet.
9
+ - **Real faithfulness scoring** — NLI model support (`--nli-model`) for verifying generated answers against retrieved chunks. Falls back to heuristic overlap check.
10
+ - **Answer generation** — `--generate-answers` flag wires LiteLLM to populate `RetrievalResult.generated_answer` for faithfulness evaluation.
11
+ - **Scaled auto-QA** — Increased from 3 to 50 synthetic test questions with perplexity-based filtering to remove trivial questions.
12
+ - **Chunk visualizer integration** — Merged standalone `chunk_visualizer.py` into main report as dedicated "Chunk Analysis" section with histogram and expandable previews.
13
+ - **RAGAS re-added** — Optional extra `pip install ragcheck[ragas]` with proper version pin (`>=0.4.0,<0.5.0`).
14
+ - **Windows compatibility** — UTF-8 encoding fixes in `config_loader.py`, removed Unicode checkmark causing `cp1252` encoding errors.
15
+ - **Local model support** — Zero-cost operation via Ollama (`--answer-model ollama/phi3:mini`).
16
+
17
+ ### Fixed
18
+ - Histogram bin calculation for tiny datasets (no more backwards ranges like `276–275`)
19
+ - Faithfulness showing `0%` instead of `N/A` when `--generate-answers` is not used
20
+ - `FutureWarning` from `sentence-transformers` embedding dimension method
21
+
22
+ ## [0.1.0] - 2026-06-02
23
+
24
+ ### Added
25
+ - Typer CLI with `init`, `run`, `report` commands
26
+ - 6 chunking strategies: fixed, semantic, recursive, markdown, agentic, late
27
+ - Chunk visualization with Plotly histograms
28
+ - SentenceTransformer embeddings (all-MiniLM-L6-v2)
29
+ - ChromaDB vector store
30
+ - Auto-QA generation via LiteLLM
31
+ - Dense retriever with latency/cost tracking
32
+ - Failure classification: 4 modes (retrieval miss, hallucination, overload, boundary error)
33
+ - Recommendation engine with decision tree
34
+ - Beautiful HTML reports (single file, no server)
35
+ - CI/CD mode with GitHub Actions
36
+ - PDF/PNG export via Playwright
@@ -0,0 +1,31 @@
1
+ # Contributing to ragcheck
2
+
3
+ ## Development Setup
4
+
5
+ ```bash
6
+ git clone https://github.com/pranay7863/ragcheck.git
7
+ cd ragcheck
8
+ uv sync
9
+ uv run pytest
10
+ ```
11
+
12
+ ## Code Style
13
+
14
+ - `ruff` for linting and formatting
15
+ - `mypy` for type checking
16
+ - All code must pass `ruff check .` and `mypy ragcheck/`
17
+
18
+ ## Testing
19
+
20
+ ```bash
21
+ uv run pytest
22
+ ```
23
+
24
+ ## Pull Request Process
25
+
26
+ 1. Fork the repository
27
+ 2. Create a feature branch
28
+ 3. Make your changes
29
+ 4. Run tests and linting
30
+ 5. Commit with clear messages
31
+ 6. Open a Pull Request
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pranay Mane
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ include README.md
2
+ include LICENSE
3
+ include CHANGELOG.md
4
+ recursive-include docs *.md
5
+ recursive-include examples *.py
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragcheck-cli
3
+ Version: 0.2.0
4
+ Summary: Lighthouse for RAG systems — diagnose and fix your retrieval pipeline
5
+ Project-URL: Homepage, https://github.com/pranay7863/ragcheck
6
+ Project-URL: Documentation, https://github.com/pranay7863/ragcheck/blob/main/README.md
7
+ Project-URL: Repository, https://github.com/pranay7863/ragcheck
8
+ Project-URL: Issues, https://github.com/pranay7863/ragcheck/issues
9
+ Author-email: Pranay Mane <pranaymane78@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: ai,chunking,diagnostics,evaluation,llm,rag,retrieval
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Quality Assurance
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: chromadb>=0.4.0
23
+ Requires-Dist: jinja2>=3.1.0
24
+ Requires-Dist: litellm>=1.0.0
25
+ Requires-Dist: nltk>=3.9.0
26
+ Requires-Dist: pydantic>=2.5.0
27
+ Requires-Dist: pyyaml>=6.0
28
+ Requires-Dist: rich>=13.0.0
29
+ Requires-Dist: sentence-transformers>=2.2.0
30
+ Requires-Dist: transformers>=4.30.0
31
+ Requires-Dist: typer>=0.12.0
32
+ Provides-Extra: export
33
+ Requires-Dist: playwright>=1.40.0; extra == 'export'
34
+ Provides-Extra: pdf
35
+ Requires-Dist: pypdf2>=3.0.0; extra == 'pdf'
36
+ Provides-Extra: ragas
37
+ Requires-Dist: ragas<0.5.0,>=0.4.0; extra == 'ragas'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # ragcheck - Lighthouse for RAG Systems
41
+
42
+ [![PyPI version](https://badge.fury.io/py/ragcheck.svg)](https://badge.fury.io/py/ragcheck)
43
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
+
46
+ > One command to diagnose your RAG pipeline and get actionable fixes.
47
+
48
+ ```bash
49
+ pip install ragcheck
50
+ ragcheck init
51
+ ragcheck run --docs ./data --query "What is Article 370?"
52
+ ```
53
+
54
+ ## What is ragcheck?
55
+
56
+ **ragcheck** is a lightweight, one-command diagnostic CLI that generates a beautiful, shareable HTML report analyzing why your RAG system fails and how to fix it.
57
+
58
+ Think of it as **Lighthouse for RAG systems** — just like Lighthouse audits web pages, ragcheck audits your retrieval pipeline.
59
+
60
+ ## Features
61
+
62
+ - **Auto-Generated Test Suite** - 50 synthetic questions from your documents
63
+ - **Chunk Visualizer** - See exactly where your chunking breaks
64
+ - **Retrieval Heatmap** - Identify dead chunks and dominant chunks
65
+ - **Failure Classification** - Know WHY your RAG fails, not just THAT it fails
66
+ - **Actionable Recommendations** - Specific fixes with predicted impact
67
+ - **CI/CD Integration** - Fail builds when RAG quality regresses
68
+
69
+ ## Quick Start
70
+
71
+ ### Installation
72
+
73
+ ```bash
74
+ pip install ragcheck
75
+ ```
76
+
77
+ Or with [uv](https://github.com/astral-sh/uv):
78
+
79
+ ```bash
80
+ uv tool install ragcheck
81
+ ```
82
+
83
+ ### Initialize
84
+
85
+ ```bash
86
+ ragcheck init
87
+ ```
88
+
89
+ Creates a `ragcheck.yaml` config file in your project.
90
+
91
+ ### Run Analysis
92
+
93
+ ```bash
94
+ ragcheck run --docs ./data --query "Your test query"
95
+ ```
96
+
97
+ Generates `ragcheck_report.html` with:
98
+ - Scorecards (retrieval accuracy, faithfulness)
99
+ - Chunk boundary visualization
100
+ - Retrieval heatmap
101
+ - Failure mode classification
102
+ - Before/after score predictions
103
+
104
+ ### CI Mode
105
+
106
+ ```bash
107
+ ragcheck run --docs ./data --ci --min-score 0.80
108
+ ```
109
+
110
+ Returns exit code 0/1. Use in GitHub Actions to fail builds on quality regression.
111
+
112
+ ## Example Report
113
+
114
+ ![ragcheck report](https://raw.githubusercontent.com/yourusername/ragcheck/main/docs/report-screenshot.png)
115
+
116
+ ## Architecture
117
+
118
+ ```
119
+ ragcheck CLI
120
+ ├── Chunk Analyzer (6 strategies + benchmark)
121
+ ├── Retriever Tester (auto-QA + dense retrieval)
122
+ ├── Failure Classifier (4 failure modes)
123
+ ├── Recommendation Engine (decision tree)
124
+ └── Report Engine (Jinja2 + CSS/HTML HTML)
125
+ ```
126
+
127
+ ## Tech Stack
128
+
129
+ | Component | Tool |
130
+ |-----------|------|
131
+ | CLI | Typer + Rich |
132
+ | Config | Pydantic |
133
+ | Embeddings | sentence-transformers |
134
+ | Vector DB | ChromaDB |
135
+ | LLM Interface | LiteLLM |
136
+ | Reports | Jinja2 + CSS/HTML |
137
+
138
+ ## Configuration
139
+
140
+ `ragcheck.yaml`:
141
+
142
+ ```yaml
143
+ project_name: ragcheck
144
+ docs_path: ./data
145
+ chunking:
146
+ strategy: recursive
147
+ chunk_size: 512
148
+ chunk_overlap: 128
149
+ llm:
150
+ provider: openai
151
+ model: gpt-3.5-turbo
152
+ retrieval:
153
+ top_k: 5
154
+ similarity_threshold: 0.7
155
+ report:
156
+ format: html
157
+ include_heatmap: true
158
+ ```
159
+
160
+ ## Development
161
+
162
+ ```bash
163
+ git clone https://github.com/pranay7863/ragcheck.git
164
+ cd ragcheck
165
+ uv sync
166
+ uv run pytest
167
+ uv run ruff check .
168
+ uv run mypy ragcheck/
169
+ ```
170
+
171
+ ## Contributing
172
+
173
+ See [CONTRIBUTING.md](CONTRIBUTING.md)
174
+
175
+ ## License
176
+
177
+ MIT — see [LICENSE](LICENSE)
178
+
179
+ ## Roadmap
180
+
181
+ - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
182
+ - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
183
+ - [ ] v0.3.0 — SaaS API for teams
184
+ - [ ] v0.4.0 — Enterprise features (SSO, audit logs)
185
+
186
+ ## Support
187
+
188
+ - [GitHub Issues](https://github.com/pranay7863/ragcheck/issues)
189
+ - Twitter: [@ypranay53](https://twitter.com/pranay53)
190
+
191
+ ---
192
+
193
+ **Built with discipline.** Read the [blueprint](docs/ARCHITECTURE.md) that started it all.
@@ -0,0 +1,154 @@
1
+ # ragcheck - Lighthouse for RAG Systems
2
+
3
+ [![PyPI version](https://badge.fury.io/py/ragcheck.svg)](https://badge.fury.io/py/ragcheck)
4
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ > One command to diagnose your RAG pipeline and get actionable fixes.
8
+
9
+ ```bash
10
+ pip install ragcheck
11
+ ragcheck init
12
+ ragcheck run --docs ./data --query "What is Article 370?"
13
+ ```
14
+
15
+ ## What is ragcheck?
16
+
17
+ **ragcheck** is a lightweight, one-command diagnostic CLI that generates a beautiful, shareable HTML report analyzing why your RAG system fails and how to fix it.
18
+
19
+ Think of it as **Lighthouse for RAG systems** — just like Lighthouse audits web pages, ragcheck audits your retrieval pipeline.
20
+
21
+ ## Features
22
+
23
+ - **Auto-Generated Test Suite** - 50 synthetic questions from your documents
24
+ - **Chunk Visualizer** - See exactly where your chunking breaks
25
+ - **Retrieval Heatmap** - Identify dead chunks and dominant chunks
26
+ - **Failure Classification** - Know WHY your RAG fails, not just THAT it fails
27
+ - **Actionable Recommendations** - Specific fixes with predicted impact
28
+ - **CI/CD Integration** - Fail builds when RAG quality regresses
29
+
30
+ ## Quick Start
31
+
32
+ ### Installation
33
+
34
+ ```bash
35
+ pip install ragcheck
36
+ ```
37
+
38
+ Or with [uv](https://github.com/astral-sh/uv):
39
+
40
+ ```bash
41
+ uv tool install ragcheck
42
+ ```
43
+
44
+ ### Initialize
45
+
46
+ ```bash
47
+ ragcheck init
48
+ ```
49
+
50
+ Creates a `ragcheck.yaml` config file in your project.
51
+
52
+ ### Run Analysis
53
+
54
+ ```bash
55
+ ragcheck run --docs ./data --query "Your test query"
56
+ ```
57
+
58
+ Generates `ragcheck_report.html` with:
59
+ - Scorecards (retrieval accuracy, faithfulness)
60
+ - Chunk boundary visualization
61
+ - Retrieval heatmap
62
+ - Failure mode classification
63
+ - Before/after score predictions
64
+
65
+ ### CI Mode
66
+
67
+ ```bash
68
+ ragcheck run --docs ./data --ci --min-score 0.80
69
+ ```
70
+
71
+ Returns exit code 0/1. Use in GitHub Actions to fail builds on quality regression.
72
+
73
+ ## Example Report
74
+
75
+ ![ragcheck report](https://raw.githubusercontent.com/yourusername/ragcheck/main/docs/report-screenshot.png)
76
+
77
+ ## Architecture
78
+
79
+ ```
80
+ ragcheck CLI
81
+ ├── Chunk Analyzer (6 strategies + benchmark)
82
+ ├── Retriever Tester (auto-QA + dense retrieval)
83
+ ├── Failure Classifier (4 failure modes)
84
+ ├── Recommendation Engine (decision tree)
85
+ └── Report Engine (Jinja2 + CSS/HTML HTML)
86
+ ```
87
+
88
+ ## Tech Stack
89
+
90
+ | Component | Tool |
91
+ |-----------|------|
92
+ | CLI | Typer + Rich |
93
+ | Config | Pydantic |
94
+ | Embeddings | sentence-transformers |
95
+ | Vector DB | ChromaDB |
96
+ | LLM Interface | LiteLLM |
97
+ | Reports | Jinja2 + CSS/HTML |
98
+
99
+ ## Configuration
100
+
101
+ `ragcheck.yaml`:
102
+
103
+ ```yaml
104
+ project_name: ragcheck
105
+ docs_path: ./data
106
+ chunking:
107
+ strategy: recursive
108
+ chunk_size: 512
109
+ chunk_overlap: 128
110
+ llm:
111
+ provider: openai
112
+ model: gpt-3.5-turbo
113
+ retrieval:
114
+ top_k: 5
115
+ similarity_threshold: 0.7
116
+ report:
117
+ format: html
118
+ include_heatmap: true
119
+ ```
120
+
121
+ ## Development
122
+
123
+ ```bash
124
+ git clone https://github.com/pranay7863/ragcheck.git
125
+ cd ragcheck
126
+ uv sync
127
+ uv run pytest
128
+ uv run ruff check .
129
+ uv run mypy ragcheck/
130
+ ```
131
+
132
+ ## Contributing
133
+
134
+ See [CONTRIBUTING.md](CONTRIBUTING.md)
135
+
136
+ ## License
137
+
138
+ MIT — see [LICENSE](LICENSE)
139
+
140
+ ## Roadmap
141
+
142
+ - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
143
+ - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
144
+ - [ ] v0.3.0 — SaaS API for teams
145
+ - [ ] v0.4.0 — Enterprise features (SSO, audit logs)
146
+
147
+ ## Support
148
+
149
+ - [GitHub Issues](https://github.com/pranay7863/ragcheck/issues)
150
+ - Twitter: [@ypranay53](https://twitter.com/pranay53)
151
+
152
+ ---
153
+
154
+ **Built with discipline.** Read the [blueprint](docs/ARCHITECTURE.md) that started it all.
@@ -0,0 +1,40 @@
1
+ # ragcheck Architecture
2
+
3
+ ## Overview
4
+
5
+ ```
6
+ ragcheck CLI (Typer + Rich)
7
+ |
8
+ +-- Document Loader (Text, Markdown)
9
+ |
10
+ +-- Chunk Analyzer (6 strategies)
11
+ | +-- Fixed-size, Semantic, Recursive
12
+ | +-- Markdown-aware, Agentic, Late
13
+ |
14
+ +-- Embedding Manager (sentence-transformers)
15
+ |
16
+ +-- Vector Store (ChromaDB)
17
+ |
18
+ +-- Retriever Tester (DenseRetriever)
19
+ | +-- Auto-QA Generation (LiteLLM)
20
+ | +-- Latency/Cost Tracking
21
+ |
22
+ +-- Failure Classifier (4 modes)
23
+ | +-- Retrieval Miss
24
+ | +-- Context Overload
25
+ | +-- Hallucination
26
+ | +-- Chunk Boundary Error
27
+ |
28
+ +-- Recommendation Engine (Decision Tree)
29
+ |
30
+ +-- Report Engine (Jinja2 + Plotly)
31
+ +-- HTML Report (single file)
32
+ +-- PDF/PNG Export (Playwright)
33
+ ```
34
+
35
+ ## Design Principles
36
+
37
+ 1. **Zero-infrastructure**: `pip install ragcheck` works out of the box
38
+ 2. **Single-file output**: HTML report is one file, no server needed
39
+ 3. **Framework agnostic**: No LangChain or LlamaIndex dependency in core
40
+ 4. **Offline-first**: Core metrics use local models; LLM calls are optional
@@ -0,0 +1,47 @@
1
+ """Demo script for all 6 chunking strategies."""
2
+
3
+ from pathlib import Path
4
+
5
+ from ragcheck.analyzers.chunkers import ChunkerFactory, benchmark_chunking
6
+ from ragcheck.reports.chunk_visualizer import generate_chunk_viz
7
+
8
+
9
+ def main():
10
+ sample_text = """# Introduction to RAG
11
+
12
+ RAG (Retrieval-Augmented Generation) is a technique that combines
13
+ retrieval systems with generative AI. It works by retrieving relevant documents
14
+ from a knowledge base and then using a large language model to generate answers.
15
+
16
+ ## Key Components
17
+
18
+ The key components are: a document store, an embedding model,
19
+ a vector database, and a language model. Chunking strategy is critical because
20
+ poor chunking can split important context across boundaries.
21
+
22
+ ## Chunking Strategies
23
+
24
+ Common strategies include fixed-size, semantic, recursive, markdown-aware,
25
+ agentic (LLM-based), and late chunking (contextual embeddings)."""
26
+
27
+ # Benchmark all 6 strategies
28
+ strategies = ["fixed", "semantic", "recursive", "markdown", "agentic", "late"]
29
+ results = benchmark_chunking(sample_text, "sample.md", strategies)
30
+
31
+ print("Chunking Benchmark Results — All 6 Strategies")
32
+ print("=" * 60)
33
+ for strategy, metrics in results.items():
34
+ print(f"\n{strategy.upper():12} | Chunks: {metrics['num_chunks']:3} | "
35
+ f"Avg: {metrics['avg_length']:6.1f} | Loss: {metrics['context_loss_score']:.2%}")
36
+
37
+ # Generate HTML visualization for markdown chunker (most interesting for this doc)
38
+ md_chunks = results["markdown"]["chunks"]
39
+ html = generate_chunk_viz(md_chunks, "sample.md", "markdown", sample_text)
40
+
41
+ output_path = Path("chunk_visualization.html")
42
+ output_path.write_text(html, encoding="utf-8")
43
+ print(f"\nVisualization saved to: {output_path.absolute()}")
44
+
45
+
46
+ if __name__ == "__main__":
47
+ main()
@@ -0,0 +1,94 @@
1
+ """Demo: Failure classification + recommendations."""
2
+
3
+ from ragcheck.analyzers.chunkers import Chunk
4
+ from ragcheck.analyzers.failure_classifier import FailureClassifier
5
+ from ragcheck.analyzers.recommender import RecommendationEngine, predict_scores
6
+
7
+
8
+ def main():
9
+ classifier = FailureClassifier()
10
+ engine = RecommendationEngine()
11
+
12
+ # Simulate 4 different failure scenarios
13
+ scenarios = [
14
+ {
15
+ "name": "Retrieval Miss",
16
+ "question": "What is quantum computing?",
17
+ "expected": "Quantum computing uses qubits.",
18
+ "generated": "",
19
+ "retrieved": [],
20
+ "source": ["Quantum computing uses qubits for computation."],
21
+ },
22
+ {
23
+ "name": "Hallucination",
24
+ "question": "What is RAG?",
25
+ "expected": "RAG is Retrieval-Augmented Generation.",
26
+ "generated": "RAG is a type of database invented in 2015 by Google.",
27
+ "retrieved": [Chunk("RAG is Retrieval-Augmented Generation.", 0, 40, "doc.txt", "fixed")],
28
+ "source": ["RAG is Retrieval-Augmented Generation."],
29
+ },
30
+ {
31
+ "name": "Context Overload",
32
+ "question": "How does RAG work?",
33
+ "expected": "RAG retrieves documents then generates answers.",
34
+ "generated": "RAG retrieves documents then generates answers.",
35
+ "retrieved": [Chunk(f"chunk{i}", i*10, i*10+10, "doc.txt", "fixed") for i in range(6)],
36
+ "source": ["RAG retrieves documents then generates answers."],
37
+ },
38
+ {
39
+ "name": "Chunk Boundary Error",
40
+ "question": "Explain the full RAG pipeline.",
41
+ "expected": "RAG has retrieval and generation components working together.",
42
+ "generated": "RAG has retrieval and generation components.",
43
+ "retrieved": [
44
+ Chunk("RAG has retrieval components", 0, 28, "doc.txt", "fixed"),
45
+ Chunk("and generation components working", 29, 60, "doc.txt", "fixed"),
46
+ ],
47
+ "source": ["RAG has retrieval and generation components working together."],
48
+ },
49
+ ]
50
+
51
+ print("Failure Classification Demo")
52
+ print("=" * 60)
53
+
54
+ all_failures = []
55
+ for s in scenarios:
56
+ analysis = classifier.classify(
57
+ question=s["question"],
58
+ expected_answer=s["expected"],
59
+ generated_answer=s["generated"],
60
+ retrieved_chunks=s["retrieved"],
61
+ source_chunks=s["source"],
62
+ )
63
+ all_failures.append(analysis)
64
+
65
+ print(f"\n{s['name']}:")
66
+ print(f" Mode: {analysis.failure_mode.value}")
67
+ print(f" Confidence: {analysis.confidence}")
68
+ print(f" Explanation: {analysis.explanation}")
69
+ print(f" Fix: {analysis.recommendation}")
70
+
71
+ # Generate recommendations from all failures
72
+ print("\n" + "=" * 60)
73
+ print("Prioritized Recommendations")
74
+ print("=" * 60)
75
+
76
+ recommendations = engine.generate_recommendations(all_failures)
77
+ for i, rec in enumerate(recommendations[:5], 1):
78
+ print(f"\n{i}. {rec.title} [{rec.implementation_difficulty}]")
79
+ print(f" {rec.description}")
80
+ print(f" Expected improvement: +{rec.expected_improvement:.1%}")
81
+ print(f" Tradeoffs: {rec.tradeoffs}")
82
+ if rec.code_example:
83
+ print(f" Code: {rec.code_example}")
84
+
85
+ # Score prediction
86
+ print("\n" + "=" * 60)
87
+ current = 0.55
88
+ prediction = predict_scores(current, recommendations)
89
+ print(f"Score Prediction: {prediction['current_score']:.0%} -> {prediction['predicted_score']:.0%}")
90
+ print(f" (+{prediction['improvement']:.1%} from top {prediction['recommendations_applied']} recommendations)")
91
+
92
+
93
+ if __name__ == "__main__":
94
+ main()