PyPI - ragcheck-cli - Versions diffs - 0.2.0__tar.gz - Mend

ragcheck-cli 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

ragcheck_cli-0.2.0/.github/workflows/ragcheck.yml +25 -0
ragcheck_cli-0.2.0/.github/workflows/tests.yml +22 -0
ragcheck_cli-0.2.0/.gitignore +12 -0
ragcheck_cli-0.2.0/CHANGELOG.md +36 -0
ragcheck_cli-0.2.0/CONTRIBUTING.md +31 -0
ragcheck_cli-0.2.0/LICENSE +21 -0
ragcheck_cli-0.2.0/MANIFEST.in +5 -0
ragcheck_cli-0.2.0/PKG-INFO +193 -0
ragcheck_cli-0.2.0/README.md +154 -0
ragcheck_cli-0.2.0/docs/ARCHITECTURE.md +40 -0
ragcheck_cli-0.2.0/examples/chunk_demo.py +47 -0
ragcheck_cli-0.2.0/examples/classifier_demo.py +94 -0
ragcheck_cli-0.2.0/examples/demo.py +91 -0
ragcheck_cli-0.2.0/examples/embed_demo.py +50 -0
ragcheck_cli-0.2.0/examples/full_pipeline_demo.py +36 -0
ragcheck_cli-0.2.0/examples/qa_demo.py +73 -0
ragcheck_cli-0.2.0/examples/report_demo.py +65 -0
ragcheck_cli-0.2.0/legal_data/BNSS 2023.pdf +0 -0
ragcheck_cli-0.2.0/legal_data/BNS_2023.pdf +0 -0
ragcheck_cli-0.2.0/legal_data/BSA_2023.pdf +0 -0
ragcheck_cli-0.2.0/pyproject.toml +83 -0
ragcheck_cli-0.2.0/ragcheck/__init__.py +3 -0
ragcheck_cli-0.2.0/ragcheck/__main__.py +6 -0
ragcheck_cli-0.2.0/ragcheck/analyzers/__init__.py +1 -0
ragcheck_cli-0.2.0/ragcheck/analyzers/chunkers.py +289 -0
ragcheck_cli-0.2.0/ragcheck/analyzers/failure_classifier.py +174 -0
ragcheck_cli-0.2.0/ragcheck/analyzers/recommender.py +176 -0
ragcheck_cli-0.2.0/ragcheck/cli.py +211 -0
ragcheck_cli-0.2.0/ragcheck/core/__init__.py +1 -0
ragcheck_cli-0.2.0/ragcheck/core/config.py +75 -0
ragcheck_cli-0.2.0/ragcheck/core/config_loader.py +55 -0
ragcheck_cli-0.2.0/ragcheck/core/document_loader.py +99 -0
ragcheck_cli-0.2.0/ragcheck/core/embeddings.py +38 -0
ragcheck_cli-0.2.0/ragcheck/core/progress.py +41 -0
ragcheck_cli-0.2.0/ragcheck/core/vector_store.py +81 -0
ragcheck_cli-0.2.0/ragcheck/reports/__init__.py +1 -0
ragcheck_cli-0.2.0/ragcheck/reports/chunk_visualizer.py +132 -0
ragcheck_cli-0.2.0/ragcheck/reports/export.py +52 -0
ragcheck_cli-0.2.0/ragcheck/reports/generator.py +235 -0
ragcheck_cli-0.2.0/ragcheck/reports/html_report.py +460 -0
ragcheck_cli-0.2.0/ragcheck/testers/__init__.py +1 -0
ragcheck_cli-0.2.0/ragcheck/testers/auto_qa.py +221 -0
ragcheck_cli-0.2.0/ragcheck/testers/retrieval_tester.py +185 -0
ragcheck_cli-0.2.0/ragcheck.yaml +27 -0
ragcheck_cli-0.2.0/sample_data/components.txt +1 -0
ragcheck_cli-0.2.0/sample_data/intro.txt +1 -0
ragcheck_cli-0.2.0/tests/__init__.py +1 -0
ragcheck_cli-0.2.0/tests/integration/test_end_to_end.py +74 -0
ragcheck_cli-0.2.0/tests/integration/test_pip_install.py +26 -0
ragcheck_cli-0.2.0/tests/unit/__init__.py +1 -0
ragcheck_cli-0.2.0/tests/unit/test_auto_qa.py +36 -0
ragcheck_cli-0.2.0/tests/unit/test_chunkers.py +150 -0
ragcheck_cli-0.2.0/tests/unit/test_cli.py +44 -0
ragcheck_cli-0.2.0/tests/unit/test_config.py +33 -0
ragcheck_cli-0.2.0/tests/unit/test_embeddings.py +30 -0
ragcheck_cli-0.2.0/tests/unit/test_failure_classifier.py +58 -0
ragcheck_cli-0.2.0/tests/unit/test_html_report.py +73 -0
ragcheck_cli-0.2.0/tests/unit/test_recommender.py +70 -0
ragcheck_cli-0.2.0/tests/unit/test_retrieval_tester.py +97 -0
ragcheck_cli-0.2.0/tests/unit/test_vector_store.py +61 -0

ragcheck_cli-0.2.0/.github/workflows/ragcheck.yml ADDED Viewed

@@ -0,0 +1,25 @@
+name: RAG Quality Check
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  ragcheck:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - run: uv sync
+      - run: uv run ragcheck run --docs ./data --ci --min-score 0.80
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ragcheck-report
+          path: ragcheck_report.html

ragcheck_cli-0.2.0/.github/workflows/tests.yml ADDED Viewed

@@ -0,0 +1,22 @@
+name: Tests
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - run: uv sync
+      - run: uv run pytest tests/ -v --cov=ragcheck --cov-report=term-missing

ragcheck_cli-0.2.0/.gitignore ADDED Viewed

@@ -0,0 +1,12 @@
+__pycache__/
+*.pyc
+.env
+.venv/
+dist/
+build/
+*.egg-info/
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+*.html
+*.png

ragcheck_cli-0.2.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,36 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+## [0.2.0] - 2026-06-04
+### Added
+- **Offline HTML reports** — Replaced Plotly CDN with pure CSS/HTML charts. Reports work without internet.
+- **Real faithfulness scoring** — NLI model support (`--nli-model`) for verifying generated answers against retrieved chunks. Falls back to heuristic overlap check.
+- **Answer generation** — `--generate-answers` flag wires LiteLLM to populate `RetrievalResult.generated_answer` for faithfulness evaluation.
+- **Scaled auto-QA** — Increased from 3 to 50 synthetic test questions with perplexity-based filtering to remove trivial questions.
+- **Chunk visualizer integration** — Merged standalone `chunk_visualizer.py` into main report as dedicated "Chunk Analysis" section with histogram and expandable previews.
+- **RAGAS re-added** — Optional extra `pip install ragcheck[ragas]` with proper version pin (`>=0.4.0,<0.5.0`).
+- **Windows compatibility** — UTF-8 encoding fixes in `config_loader.py`, removed Unicode checkmark causing `cp1252` encoding errors.
+- **Local model support** — Zero-cost operation via Ollama (`--answer-model ollama/phi3:mini`).
+### Fixed
+- Histogram bin calculation for tiny datasets (no more backwards ranges like `276–275`)
+- Faithfulness showing `0%` instead of `N/A` when `--generate-answers` is not used
+- `FutureWarning` from `sentence-transformers` embedding dimension method
+## [0.1.0] - 2026-06-02
+### Added
+- Typer CLI with `init`, `run`, `report` commands
+- 6 chunking strategies: fixed, semantic, recursive, markdown, agentic, late
+- Chunk visualization with Plotly histograms
+- SentenceTransformer embeddings (all-MiniLM-L6-v2)
+- ChromaDB vector store
+- Auto-QA generation via LiteLLM
+- Dense retriever with latency/cost tracking
+- Failure classification: 4 modes (retrieval miss, hallucination, overload, boundary error)
+- Recommendation engine with decision tree
+- Beautiful HTML reports (single file, no server)
+- CI/CD mode with GitHub Actions
+- PDF/PNG export via Playwright

ragcheck_cli-0.2.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,31 @@
+# Contributing to ragcheck
+## Development Setup
+```bash
+git clone https://github.com/pranay7863/ragcheck.git
+cd ragcheck
+uv sync
+uv run pytest
+```
+## Code Style
+- `ruff` for linting and formatting
+- `mypy` for type checking
+- All code must pass `ruff check .` and `mypy ragcheck/`
+## Testing
+```bash
+uv run pytest
+```
+## Pull Request Process
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Run tests and linting
+5. Commit with clear messages
+6. Open a Pull Request

ragcheck_cli-0.2.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Pranay Mane
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

ragcheck_cli-0.2.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,5 @@
+include README.md
+include LICENSE
+include CHANGELOG.md
+recursive-include docs *.md
+recursive-include examples *.py

ragcheck_cli-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,193 @@
+Metadata-Version: 2.4
+Name: ragcheck-cli
+Version: 0.2.0
+Summary: Lighthouse for RAG systems — diagnose and fix your retrieval pipeline
+Project-URL: Homepage, https://github.com/pranay7863/ragcheck
+Project-URL: Documentation, https://github.com/pranay7863/ragcheck/blob/main/README.md
+Project-URL: Repository, https://github.com/pranay7863/ragcheck
+Project-URL: Issues, https://github.com/pranay7863/ragcheck/issues
+Author-email: Pranay Mane <pranaymane78@gmail.com>
+License: MIT
+License-File: LICENSE
+Keywords: ai,chunking,diagnostics,evaluation,llm,rag,retrieval
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.10
+Requires-Dist: chromadb>=0.4.0
+Requires-Dist: jinja2>=3.1.0
+Requires-Dist: litellm>=1.0.0
+Requires-Dist: nltk>=3.9.0
+Requires-Dist: pydantic>=2.5.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: sentence-transformers>=2.2.0
+Requires-Dist: transformers>=4.30.0
+Requires-Dist: typer>=0.12.0
+Provides-Extra: export
+Requires-Dist: playwright>=1.40.0; extra == 'export'
+Provides-Extra: pdf
+Requires-Dist: pypdf2>=3.0.0; extra == 'pdf'
+Provides-Extra: ragas
+Requires-Dist: ragas<0.5.0,>=0.4.0; extra == 'ragas'
+Description-Content-Type: text/markdown
+# ragcheck - Lighthouse for RAG Systems
+[![PyPI version](https://badge.fury.io/py/ragcheck.svg)](https://badge.fury.io/py/ragcheck)
+[![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+> One command to diagnose your RAG pipeline and get actionable fixes.
+```bash
+pip install ragcheck
+ragcheck init
+ragcheck run --docs ./data --query "What is Article 370?"
+```
+## What is ragcheck?
+**ragcheck** is a lightweight, one-command diagnostic CLI that generates a beautiful, shareable HTML report analyzing why your RAG system fails and how to fix it.
+Think of it as **Lighthouse for RAG systems** — just like Lighthouse audits web pages, ragcheck audits your retrieval pipeline.
+## Features
+- **Auto-Generated Test Suite** - 50 synthetic questions from your documents
+- **Chunk Visualizer** - See exactly where your chunking breaks
+- **Retrieval Heatmap** - Identify dead chunks and dominant chunks
+- **Failure Classification** - Know WHY your RAG fails, not just THAT it fails
+- **Actionable Recommendations** - Specific fixes with predicted impact
+- **CI/CD Integration** - Fail builds when RAG quality regresses
+## Quick Start
+### Installation
+```bash
+pip install ragcheck
+```
+Or with [uv](https://github.com/astral-sh/uv):
+```bash
+uv tool install ragcheck
+```
+### Initialize
+```bash
+ragcheck init
+```
+Creates a `ragcheck.yaml` config file in your project.
+### Run Analysis
+```bash
+ragcheck run --docs ./data --query "Your test query"
+```
+Generates `ragcheck_report.html` with:
+- Scorecards (retrieval accuracy, faithfulness)
+- Chunk boundary visualization
+- Retrieval heatmap
+- Failure mode classification
+- Before/after score predictions
+### CI Mode
+```bash
+ragcheck run --docs ./data --ci --min-score 0.80
+```
+Returns exit code 0/1. Use in GitHub Actions to fail builds on quality regression.
+## Example Report
+![ragcheck report](https://raw.githubusercontent.com/yourusername/ragcheck/main/docs/report-screenshot.png)
+## Architecture
+```
+ragcheck CLI
+    ├── Chunk Analyzer (6 strategies + benchmark)
+    ├── Retriever Tester (auto-QA + dense retrieval)
+    ├── Failure Classifier (4 failure modes)
+    ├── Recommendation Engine (decision tree)
+    └── Report Engine (Jinja2 + CSS/HTML HTML)
+```
+## Tech Stack
+| Component | Tool |
+|-----------|------|
+| CLI | Typer + Rich |
+| Config | Pydantic |
+| Embeddings | sentence-transformers |
+| Vector DB | ChromaDB |
+| LLM Interface | LiteLLM |
+| Reports | Jinja2 + CSS/HTML |
+## Configuration
+`ragcheck.yaml`:
+```yaml
+project_name: ragcheck
+docs_path: ./data
+chunking:
+  strategy: recursive
+  chunk_size: 512
+  chunk_overlap: 128
+llm:
+  provider: openai
+  model: gpt-3.5-turbo
+retrieval:
+  top_k: 5
+  similarity_threshold: 0.7
+report:
+  format: html
+  include_heatmap: true
+```
+## Development
+```bash
+git clone https://github.com/pranay7863/ragcheck.git
+cd ragcheck
+uv sync
+uv run pytest
+uv run ruff check .
+uv run mypy ragcheck/
+```
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md)
+## License
+MIT — see [LICENSE](LICENSE)
+## Roadmap
+- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
+- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
+- [ ] v0.3.0 — SaaS API for teams
+- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
+## Support
+- [GitHub Issues](https://github.com/pranay7863/ragcheck/issues)
+- Twitter: [@ypranay53](https://twitter.com/pranay53)
+---
+**Built with discipline.** Read the [blueprint](docs/ARCHITECTURE.md) that started it all.

ragcheck_cli-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,154 @@
+# ragcheck - Lighthouse for RAG Systems
+[![PyPI version](https://badge.fury.io/py/ragcheck.svg)](https://badge.fury.io/py/ragcheck)
+[![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+> One command to diagnose your RAG pipeline and get actionable fixes.
+```bash
+pip install ragcheck
+ragcheck init
+ragcheck run --docs ./data --query "What is Article 370?"
+```
+## What is ragcheck?
+**ragcheck** is a lightweight, one-command diagnostic CLI that generates a beautiful, shareable HTML report analyzing why your RAG system fails and how to fix it.
+Think of it as **Lighthouse for RAG systems** — just like Lighthouse audits web pages, ragcheck audits your retrieval pipeline.
+## Features
+- **Auto-Generated Test Suite** - 50 synthetic questions from your documents
+- **Chunk Visualizer** - See exactly where your chunking breaks
+- **Retrieval Heatmap** - Identify dead chunks and dominant chunks
+- **Failure Classification** - Know WHY your RAG fails, not just THAT it fails
+- **Actionable Recommendations** - Specific fixes with predicted impact
+- **CI/CD Integration** - Fail builds when RAG quality regresses
+## Quick Start
+### Installation
+```bash
+pip install ragcheck
+```
+Or with [uv](https://github.com/astral-sh/uv):
+```bash
+uv tool install ragcheck
+```
+### Initialize
+```bash
+ragcheck init
+```
+Creates a `ragcheck.yaml` config file in your project.
+### Run Analysis
+```bash
+ragcheck run --docs ./data --query "Your test query"
+```
+Generates `ragcheck_report.html` with:
+- Scorecards (retrieval accuracy, faithfulness)
+- Chunk boundary visualization
+- Retrieval heatmap
+- Failure mode classification
+- Before/after score predictions
+### CI Mode
+```bash
+ragcheck run --docs ./data --ci --min-score 0.80
+```
+Returns exit code 0/1. Use in GitHub Actions to fail builds on quality regression.
+## Example Report
+![ragcheck report](https://raw.githubusercontent.com/yourusername/ragcheck/main/docs/report-screenshot.png)
+## Architecture
+```
+ragcheck CLI
+    ├── Chunk Analyzer (6 strategies + benchmark)
+    ├── Retriever Tester (auto-QA + dense retrieval)
+    ├── Failure Classifier (4 failure modes)
+    ├── Recommendation Engine (decision tree)
+    └── Report Engine (Jinja2 + CSS/HTML HTML)
+```
+## Tech Stack
+| Component | Tool |
+|-----------|------|
+| CLI | Typer + Rich |
+| Config | Pydantic |
+| Embeddings | sentence-transformers |
+| Vector DB | ChromaDB |
+| LLM Interface | LiteLLM |
+| Reports | Jinja2 + CSS/HTML |
+## Configuration
+`ragcheck.yaml`:
+```yaml
+project_name: ragcheck
+docs_path: ./data
+chunking:
+  strategy: recursive
+  chunk_size: 512
+  chunk_overlap: 128
+llm:
+  provider: openai
+  model: gpt-3.5-turbo
+retrieval:
+  top_k: 5
+  similarity_threshold: 0.7
+report:
+  format: html
+  include_heatmap: true
+```
+## Development
+```bash
+git clone https://github.com/pranay7863/ragcheck.git
+cd ragcheck
+uv sync
+uv run pytest
+uv run ruff check .
+uv run mypy ragcheck/
+```
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md)
+## License
+MIT — see [LICENSE](LICENSE)
+## Roadmap
+- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
+- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
+- [ ] v0.3.0 — SaaS API for teams
+- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
+## Support
+- [GitHub Issues](https://github.com/pranay7863/ragcheck/issues)
+- Twitter: [@ypranay53](https://twitter.com/pranay53)
+---
+**Built with discipline.** Read the [blueprint](docs/ARCHITECTURE.md) that started it all.

ragcheck_cli-0.2.0/docs/ARCHITECTURE.md ADDED Viewed

@@ -0,0 +1,40 @@
+# ragcheck Architecture
+## Overview
+```
+ragcheck CLI (Typer + Rich)
+    |
+    +-- Document Loader (Text, Markdown)
+    |
+    +-- Chunk Analyzer (6 strategies)
+    |       +-- Fixed-size, Semantic, Recursive
+    |       +-- Markdown-aware, Agentic, Late
+    |
+    +-- Embedding Manager (sentence-transformers)
+    |
+    +-- Vector Store (ChromaDB)
+    |
+    +-- Retriever Tester (DenseRetriever)
+    |       +-- Auto-QA Generation (LiteLLM)
+    |       +-- Latency/Cost Tracking
+    |
+    +-- Failure Classifier (4 modes)
+    |       +-- Retrieval Miss
+    |       +-- Context Overload
+    |       +-- Hallucination
+    |       +-- Chunk Boundary Error
+    |
+    +-- Recommendation Engine (Decision Tree)
+    |
+    +-- Report Engine (Jinja2 + Plotly)
+            +-- HTML Report (single file)
+            +-- PDF/PNG Export (Playwright)
+```
+## Design Principles
+1. **Zero-infrastructure**: `pip install ragcheck` works out of the box
+2. **Single-file output**: HTML report is one file, no server needed
+3. **Framework agnostic**: No LangChain or LlamaIndex dependency in core
+4. **Offline-first**: Core metrics use local models; LLM calls are optional

ragcheck_cli-0.2.0/examples/chunk_demo.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Demo script for all 6 chunking strategies."""
+from pathlib import Path
+from ragcheck.analyzers.chunkers import ChunkerFactory, benchmark_chunking
+from ragcheck.reports.chunk_visualizer import generate_chunk_viz
+def main():
+    sample_text = """# Introduction to RAG
+RAG (Retrieval-Augmented Generation) is a technique that combines
+retrieval systems with generative AI. It works by retrieving relevant documents
+from a knowledge base and then using a large language model to generate answers.
+## Key Components
+The key components are: a document store, an embedding model,
+a vector database, and a language model. Chunking strategy is critical because
+poor chunking can split important context across boundaries.
+## Chunking Strategies
+Common strategies include fixed-size, semantic, recursive, markdown-aware,
+agentic (LLM-based), and late chunking (contextual embeddings)."""
+    # Benchmark all 6 strategies
+    strategies = ["fixed", "semantic", "recursive", "markdown", "agentic", "late"]
+    results = benchmark_chunking(sample_text, "sample.md", strategies)
+    print("Chunking Benchmark Results — All 6 Strategies")
+    print("=" * 60)
+    for strategy, metrics in results.items():
+        print(f"\n{strategy.upper():12} | Chunks: {metrics['num_chunks']:3} | "
+              f"Avg: {metrics['avg_length']:6.1f} | Loss: {metrics['context_loss_score']:.2%}")
+    # Generate HTML visualization for markdown chunker (most interesting for this doc)
+    md_chunks = results["markdown"]["chunks"]
+    html = generate_chunk_viz(md_chunks, "sample.md", "markdown", sample_text)
+    output_path = Path("chunk_visualization.html")
+    output_path.write_text(html, encoding="utf-8")
+    print(f"\nVisualization saved to: {output_path.absolute()}")
+if __name__ == "__main__":
+    main()

ragcheck_cli-0.2.0/examples/classifier_demo.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Demo: Failure classification + recommendations."""
+from ragcheck.analyzers.chunkers import Chunk
+from ragcheck.analyzers.failure_classifier import FailureClassifier
+from ragcheck.analyzers.recommender import RecommendationEngine, predict_scores
+def main():
+    classifier = FailureClassifier()
+    engine = RecommendationEngine()
+    # Simulate 4 different failure scenarios
+    scenarios = [
+        {
+            "name": "Retrieval Miss",
+            "question": "What is quantum computing?",
+            "expected": "Quantum computing uses qubits.",
+            "generated": "",
+            "retrieved": [],
+            "source": ["Quantum computing uses qubits for computation."],
+        },
+        {
+            "name": "Hallucination",
+            "question": "What is RAG?",
+            "expected": "RAG is Retrieval-Augmented Generation.",
+            "generated": "RAG is a type of database invented in 2015 by Google.",
+            "retrieved": [Chunk("RAG is Retrieval-Augmented Generation.", 0, 40, "doc.txt", "fixed")],
+            "source": ["RAG is Retrieval-Augmented Generation."],
+        },
+        {
+            "name": "Context Overload",
+            "question": "How does RAG work?",
+            "expected": "RAG retrieves documents then generates answers.",
+            "generated": "RAG retrieves documents then generates answers.",
+            "retrieved": [Chunk(f"chunk{i}", i*10, i*10+10, "doc.txt", "fixed") for i in range(6)],
+            "source": ["RAG retrieves documents then generates answers."],
+        },
+        {
+            "name": "Chunk Boundary Error",
+            "question": "Explain the full RAG pipeline.",
+            "expected": "RAG has retrieval and generation components working together.",
+            "generated": "RAG has retrieval and generation components.",
+            "retrieved": [
+                Chunk("RAG has retrieval components", 0, 28, "doc.txt", "fixed"),
+                Chunk("and generation components working", 29, 60, "doc.txt", "fixed"),
+            ],
+            "source": ["RAG has retrieval and generation components working together."],
+        },
+    ]
+    print("Failure Classification Demo")
+    print("=" * 60)
+    all_failures = []
+    for s in scenarios:
+        analysis = classifier.classify(
+            question=s["question"],
+            expected_answer=s["expected"],
+            generated_answer=s["generated"],
+            retrieved_chunks=s["retrieved"],
+            source_chunks=s["source"],
+        )
+        all_failures.append(analysis)
+        print(f"\n{s['name']}:")
+        print(f"  Mode: {analysis.failure_mode.value}")
+        print(f"  Confidence: {analysis.confidence}")
+        print(f"  Explanation: {analysis.explanation}")
+        print(f"  Fix: {analysis.recommendation}")
+    # Generate recommendations from all failures
+    print("\n" + "=" * 60)
+    print("Prioritized Recommendations")
+    print("=" * 60)
+    recommendations = engine.generate_recommendations(all_failures)
+    for i, rec in enumerate(recommendations[:5], 1):
+        print(f"\n{i}. {rec.title} [{rec.implementation_difficulty}]")
+        print(f"   {rec.description}")
+        print(f"   Expected improvement: +{rec.expected_improvement:.1%}")
+        print(f"   Tradeoffs: {rec.tradeoffs}")
+        if rec.code_example:
+            print(f"   Code: {rec.code_example}")
+    # Score prediction
+    print("\n" + "=" * 60)
+    current = 0.55
+    prediction = predict_scores(current, recommendations)
+    print(f"Score Prediction: {prediction['current_score']:.0%} -> {prediction['predicted_score']:.0%}")
+    print(f"  (+{prediction['improvement']:.1%} from top {prediction['recommendations_applied']} recommendations)")
+if __name__ == "__main__":
+    main()