PyPI - rag-audit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

rag-audit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

rag_audit/__init__.py +3 -0
rag_audit/adapters/__init__.py +12 -0
rag_audit/adapters/base.py +11 -0
rag_audit/adapters/chroma.py +42 -0
rag_audit/adapters/stubs.py +30 -0
rag_audit/chunker/__init__.py +20 -0
rag_audit/chunker/base.py +16 -0
rag_audit/chunker/evaluator.py +51 -0
rag_audit/chunker/models.py +15 -0
rag_audit/chunker/strategies.py +96 -0
rag_audit/cli/__init__.py +0 -0
rag_audit/cli/main.py +68 -0
rag_audit/core/__init__.py +1 -0
rag_audit/core/config.py +25 -0
rag_audit/core/runner.py +41 -0
rag_audit/metrics/__init__.py +11 -0
rag_audit/metrics/hallucination.py +60 -0
rag_audit/metrics/retrieval.py +42 -0
rag_audit/report/__init__.py +6 -0
rag_audit/report/models.py +18 -0
rag_audit/report/renderer.py +58 -0
rag_audit-0.1.0.dist-info/METADATA +225 -0
rag_audit-0.1.0.dist-info/RECORD +26 -0
rag_audit-0.1.0.dist-info/WHEEL +4 -0
rag_audit-0.1.0.dist-info/entry_points.txt +2 -0
rag_audit-0.1.0.dist-info/licenses/LICENSE +21 -0

rag_audit/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""rag-audit — CLI + library to audit and benchmark RAG pipelines."""
+__version__ = "0.1.0"

rag_audit/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Adapters for vector store backends."""
+from rag_audit.adapters.base import VectorStoreAdapter
+from rag_audit.adapters.chroma import ChromaDBAdapter
+from rag_audit.adapters.stubs import PineconeAdapter, QdrantAdapter
+__all__ = [
+    "ChromaDBAdapter",
+    "PineconeAdapter",
+    "QdrantAdapter",
+    "VectorStoreAdapter",
+]

rag_audit/adapters/base.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import Protocol, runtime_checkable
+@runtime_checkable
+class VectorStoreAdapter(Protocol):
+    def add(
+        self, ids: list[str], texts: list[str], embeddings: list[list[float]]
+    ) -> None: ...
+    def query(self, embedding: list[float], k: int) -> list[str]: ...
+    def delete(self, ids: list[str]) -> None: ...
+    def count(self) -> int: ...

rag_audit/adapters/chroma.py ADDED Viewed

@@ -0,0 +1,42 @@
+import chromadb
+from chromadb.api import ClientAPI
+class ChromaDBAdapter:
+    def __init__(
+        self,
+        collection_name: str,
+        client: ClientAPI | None = None,
+        path: str | None = None,
+    ) -> None:
+        if client is not None:
+            self._client: ClientAPI = client
+        elif path is not None:
+            self._client = chromadb.PersistentClient(path=path)
+        else:
+            self._client = chromadb.EphemeralClient()
+        self._collection = self._client.get_or_create_collection(collection_name)
+    def add(
+        self, ids: list[str], texts: list[str], embeddings: list[list[float]]
+    ) -> None:
+        self._collection.add(ids=ids, documents=texts, embeddings=embeddings)  # type: ignore[arg-type]
+    def query(self, embedding: list[float], k: int) -> list[str]:
+        actual_k = min(k, self._collection.count())
+        if actual_k == 0:
+            return []
+        results = self._collection.query(
+            query_embeddings=[embedding],  # type: ignore[arg-type]
+            n_results=actual_k,
+        )
+        docs = results["documents"]
+        if not docs or docs[0] is None:
+            return []
+        return list(docs[0])
+    def delete(self, ids: list[str]) -> None:
+        self._collection.delete(ids=ids)
+    def count(self) -> int:
+        return int(self._collection.count())

rag_audit/adapters/stubs.py ADDED Viewed

@@ -0,0 +1,30 @@
+class PineconeAdapter:
+    def add(
+        self, ids: list[str], texts: list[str], embeddings: list[list[float]]
+    ) -> None:
+        raise NotImplementedError
+    def query(self, embedding: list[float], k: int) -> list[str]:
+        raise NotImplementedError
+    def delete(self, ids: list[str]) -> None:
+        raise NotImplementedError
+    def count(self) -> int:
+        raise NotImplementedError
+class QdrantAdapter:
+    def add(
+        self, ids: list[str], texts: list[str], embeddings: list[list[float]]
+    ) -> None:
+        raise NotImplementedError
+    def query(self, embedding: list[float], k: int) -> list[str]:
+        raise NotImplementedError
+    def delete(self, ids: list[str]) -> None:
+        raise NotImplementedError
+    def count(self) -> int:
+        raise NotImplementedError

rag_audit/chunker/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Chunking strategies and cohesion-based evaluation for RAG pipelines."""
+from rag_audit.chunker.base import BaseChunker
+from rag_audit.chunker.evaluator import ChunkingEvaluator
+from rag_audit.chunker.models import ChunkingReport, ChunkingStrategyReport
+from rag_audit.chunker.strategies import (
+    FixedSizeChunker,
+    RecursiveChunker,
+    SemanticChunker,
+)
+__all__ = [
+    "BaseChunker",
+    "ChunkingEvaluator",
+    "ChunkingReport",
+    "ChunkingStrategyReport",
+    "FixedSizeChunker",
+    "RecursiveChunker",
+    "SemanticChunker",
+]

rag_audit/chunker/base.py ADDED Viewed

@@ -0,0 +1,16 @@
+import math
+from typing import Protocol, runtime_checkable
+@runtime_checkable
+class BaseChunker(Protocol):
+    def chunk(self, text: str) -> list[str]: ...
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    dot = sum(x * y for x, y in zip(a, b, strict=False))
+    mag_a = math.sqrt(sum(x * x for x in a))
+    mag_b = math.sqrt(sum(x * x for x in b))
+    if mag_a == 0.0 or mag_b == 0.0:
+        return 0.0
+    return dot / (mag_a * mag_b)

rag_audit/chunker/evaluator.py ADDED Viewed

@@ -0,0 +1,51 @@
+from langchain_core.embeddings import Embeddings
+from rag_audit.chunker.base import BaseChunker, cosine_similarity
+from rag_audit.chunker.models import ChunkingReport, ChunkingStrategyReport
+class ChunkingEvaluator:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+    def evaluate(
+        self,
+        text: str,
+        chunkers: dict[str, BaseChunker],
+    ) -> ChunkingReport:
+        reports = [
+            self._evaluate_strategy(name, text, chunker)
+            for name, chunker in chunkers.items()
+        ]
+        best = max(reports, key=lambda r: r.avg_cohesion).strategy if reports else ""
+        return ChunkingReport(strategies=reports, best_strategy=best)
+    def _evaluate_strategy(
+        self,
+        name: str,
+        text: str,
+        chunker: BaseChunker,
+    ) -> ChunkingStrategyReport:
+        chunks = chunker.chunk(text)
+        if not chunks:
+            return ChunkingStrategyReport(
+                strategy=name,
+                chunk_count=0,
+                avg_cohesion=0.0,
+                min_cohesion=0.0,
+                max_cohesion=0.0,
+                avg_chunk_length=0.0,
+            )
+        doc_vec = self._embeddings.embed_query(text)
+        chunk_vecs = self._embeddings.embed_documents(chunks)
+        cohesions = [cosine_similarity(cv, doc_vec) for cv in chunk_vecs]
+        return ChunkingStrategyReport(
+            strategy=name,
+            chunk_count=len(chunks),
+            avg_cohesion=sum(cohesions) / len(cohesions),
+            min_cohesion=min(cohesions),
+            max_cohesion=max(cohesions),
+            avg_chunk_length=sum(len(c) for c in chunks) / len(chunks),
+        )

rag_audit/chunker/models.py ADDED Viewed

@@ -0,0 +1,15 @@
+from pydantic import BaseModel, Field
+class ChunkingStrategyReport(BaseModel):
+    strategy: str
+    chunk_count: int = Field(..., ge=0)
+    avg_cohesion: float = Field(..., ge=0.0, le=1.0)
+    min_cohesion: float = Field(..., ge=0.0, le=1.0)
+    max_cohesion: float = Field(..., ge=0.0, le=1.0)
+    avg_chunk_length: float = Field(..., ge=0.0)
+class ChunkingReport(BaseModel):
+    strategies: list[ChunkingStrategyReport]
+    best_strategy: str

rag_audit/chunker/strategies.py ADDED Viewed

@@ -0,0 +1,96 @@
+import re
+from langchain_core.embeddings import Embeddings
+from rag_audit.chunker.base import cosine_similarity
+_RECURSIVE_SEPARATORS = ["\n\n", "\n", ". ", " ", ""]
+class FixedSizeChunker:
+    def __init__(self, chunk_size: int = 500, overlap: int = 50) -> None:
+        self._chunk_size = chunk_size
+        self._overlap = overlap
+    def chunk(self, text: str) -> list[str]:
+        if len(text) <= self._chunk_size:
+            return [text] if text.strip() else []
+        step = max(1, self._chunk_size - self._overlap)
+        return [
+            text[i : i + self._chunk_size]
+            for i in range(0, len(text), step)
+            if text[i : i + self._chunk_size].strip()
+        ]
+class RecursiveChunker:
+    def __init__(self, chunk_size: int = 500, overlap: int = 0) -> None:
+        self._chunk_size = chunk_size
+        self._overlap = overlap
+    def chunk(self, text: str) -> list[str]:
+        return self._split(text, _RECURSIVE_SEPARATORS)
+    def _split(self, text: str, separators: list[str]) -> list[str]:
+        if len(text) <= self._chunk_size:
+            return [text] if text.strip() else []
+        sep, *rest = separators
+        if not sep:
+            step = max(1, self._chunk_size - self._overlap)
+            return [
+                text[i : i + self._chunk_size]
+                for i in range(0, len(text), step)
+                if text[i : i + self._chunk_size].strip()
+            ]
+        if sep not in text:
+            return self._split(text, rest) if rest else [text]
+        parts = [p for p in text.split(sep) if p.strip()]
+        chunks: list[str] = []
+        current = ""
+        for part in parts:
+            joined = f"{current}{sep}{part}" if current else part
+            if len(joined) <= self._chunk_size:
+                current = joined
+            else:
+                if current:
+                    chunks.append(current)
+                if len(part) > self._chunk_size:
+                    chunks.extend(self._split(part, rest if rest else [""]))
+                    current = ""
+                else:
+                    current = part
+        if current:
+            chunks.append(current)
+        return chunks
+class SemanticChunker:
+    def __init__(
+        self, embeddings: Embeddings, similarity_threshold: float = 0.8
+    ) -> None:
+        self._embeddings = embeddings
+        self._threshold = similarity_threshold
+    def chunk(self, text: str) -> list[str]:
+        sentences = [s for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
+        if len(sentences) <= 1:
+            return [text] if text.strip() else []
+        vecs = self._embeddings.embed_documents(sentences)
+        groups: list[list[str]] = [[sentences[0]]]
+        for i in range(1, len(sentences)):
+            sim = cosine_similarity(vecs[i - 1], vecs[i])
+            if sim >= self._threshold:
+                groups[-1].append(sentences[i])
+            else:
+                groups.append([sentences[i]])
+        return [" ".join(g) for g in groups if any(s.strip() for s in g)]

rag_audit/cli/__init__.py ADDED Viewed

File without changes

rag_audit/cli/main.py ADDED Viewed

@@ -0,0 +1,68 @@
+from pathlib import Path
+import typer
+from rag_audit import __version__
+from rag_audit.core.config import load_config
+from rag_audit.core.runner import AuditRunner
+from rag_audit.report.models import AuditReport
+from rag_audit.report.renderer import ReportRenderer
+app = typer.Typer(
+    name="rag-audit",
+    help="Audit and benchmark your RAG pipelines.",
+    add_completion=False,
+)
+@app.command()
+def version() -> None:
+    typer.echo(f"rag-audit v{__version__}")
+@app.command()
+def run(
+    pipeline: str = typer.Argument(
+        ..., help="Path to the pipeline config file (JSON)."
+    ),
+    output: str = typer.Option(
+        "audit_result.json", "--output", "-o", help="Output file path."
+    ),
+) -> None:
+    try:
+        config = load_config(pipeline)
+    except FileNotFoundError:
+        typer.secho(f"Config file not found: {pipeline}", fg=typer.colors.RED, err=True)
+        raise typer.Exit(code=1) from None
+    except Exception as exc:
+        typer.secho(f"Invalid config: {exc}", fg=typer.colors.RED, err=True)
+        raise typer.Exit(code=1) from exc
+    report = AuditRunner(config).run()
+    Path(output).write_text(ReportRenderer().to_json(report))
+    typer.echo(f"Audit complete. Report saved to {output}")
+@app.command()
+def report(
+    result: str = typer.Argument(..., help="Path to an audit result JSON file."),
+    fmt: str = typer.Option(
+        "markdown", "--format", "-f", help="Output format: markdown or json."
+    ),
+) -> None:
+    try:
+        audit_report = AuditReport.model_validate_json(Path(result).read_text())
+    except FileNotFoundError:
+        typer.secho(f"Result file not found: {result}", fg=typer.colors.RED, err=True)
+        raise typer.Exit(code=1) from None
+    except Exception as exc:
+        typer.secho(f"Invalid result file: {exc}", fg=typer.colors.RED, err=True)
+        raise typer.Exit(code=1) from exc
+    renderer = ReportRenderer()
+    output = (
+        renderer.to_json(audit_report)
+        if fmt == "json"
+        else renderer.to_markdown(audit_report)
+    )
+    typer.echo(output)

rag_audit/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core orchestration logic for rag-audit."""

rag_audit/core/config.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Literal
+from pydantic import BaseModel
+class LLMConfig(BaseModel):
+    provider: Literal["openai", "anthropic"] = "openai"
+    model: str = "gpt-4o-mini"
+class PipelineConfig(BaseModel):
+    pipeline_id: str
+    question: str
+    answer: str
+    contexts: list[str]
+    relevant: list[str]
+    k: int = 5
+    llm: LLMConfig = LLMConfig()
+def load_config(path: str) -> PipelineConfig:
+    return PipelineConfig.model_validate_json(Path(path).read_text())

rag_audit/core/runner.py ADDED Viewed

@@ -0,0 +1,41 @@
+from langchain_core.language_models import BaseChatModel
+from rag_audit.core.config import LLMConfig, PipelineConfig
+from rag_audit.metrics.hallucination import HallucinationDetector
+from rag_audit.metrics.retrieval import evaluate_retrieval
+from rag_audit.report.models import AuditReport
+def _build_llm(cfg: LLMConfig) -> BaseChatModel:
+    if cfg.provider == "openai":
+        from langchain_openai import ChatOpenAI
+        return ChatOpenAI(model=cfg.model)
+    from langchain_anthropic import ChatAnthropic
+    return ChatAnthropic(model_name=cfg.model)  # type: ignore[call-arg]
+class AuditRunner:
+    def __init__(self, config: PipelineConfig) -> None:
+        self._config = config
+        self._detector = HallucinationDetector(llm=_build_llm(config.llm))
+    def run(self) -> AuditReport:
+        faithfulness = self._detector.detect(
+            answer=self._config.answer,
+            contexts=self._config.contexts,
+        )
+        retrieval = evaluate_retrieval(
+            retrieved=self._config.contexts,
+            relevant=self._config.relevant,
+            k=self._config.k,
+        )
+        return AuditReport(
+            pipeline_id=self._config.pipeline_id,
+            question=self._config.question,
+            answer=self._config.answer,
+            contexts=self._config.contexts,
+            retrieval=retrieval,
+            faithfulness=faithfulness,
+        )

rag_audit/metrics/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Metrics for evaluating RAG pipeline quality."""
+from rag_audit.metrics.hallucination import FaithfulnessResult, HallucinationDetector
+from rag_audit.metrics.retrieval import RetrievalResult, evaluate_retrieval
+__all__ = [
+    "FaithfulnessResult",
+    "HallucinationDetector",
+    "RetrievalResult",
+    "evaluate_retrieval",
+]

rag_audit/metrics/hallucination.py ADDED Viewed

@@ -0,0 +1,60 @@
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+_FAITHFULNESS_PROMPT = """\
+You are a faithfulness evaluator. Determine whether the ANSWER is supported by the CONTEXT.
+CONTEXT:
+{contexts}
+ANSWER:
+{answer}
+Evaluate whether every claim in the ANSWER is explicitly supported by the CONTEXT.
+Return only a JSON object with exactly two fields:
+- "score": float from 0.0 (completely unfaithful) to 1.0 (completely faithful)
+- "reasoning": concise explanation of your evaluation\
+"""
+class FaithfulnessResult(BaseModel):
+    score: float = Field(..., ge=0.0, le=1.0)
+    reasoning: str
+    is_faithful: bool
+class _JudgeOutput(BaseModel):
+    score: float = Field(..., ge=0.0, le=1.0)
+    reasoning: str
+def _parse_judge_response(content: str, threshold: float) -> FaithfulnessResult:
+    stripped = content.strip()
+    if stripped.startswith("```"):
+        lines = stripped.splitlines()
+        stripped = "\n".join(lines[1:-1])
+    output = _JudgeOutput.model_validate_json(stripped)
+    return FaithfulnessResult(
+        score=output.score,
+        reasoning=output.reasoning,
+        is_faithful=output.score >= threshold,
+    )
+class HallucinationDetector:
+    def __init__(self, llm: BaseChatModel, threshold: float = 0.5) -> None:
+        self._llm = llm
+        self.threshold = threshold
+    def detect(self, answer: str, contexts: list[str]) -> FaithfulnessResult:
+        if not contexts:
+            return FaithfulnessResult(
+                score=0.0,
+                reasoning="No context provided to evaluate faithfulness against.",
+                is_faithful=False,
+            )
+        context_block = "\n\n".join(f"[{i + 1}] {c}" for i, c in enumerate(contexts))
+        prompt = _FAITHFULNESS_PROMPT.format(answer=answer, contexts=context_block)
+        response = self._llm.invoke([HumanMessage(content=prompt)])
+        return _parse_judge_response(str(response.content), self.threshold)

rag_audit/metrics/retrieval.py ADDED Viewed

@@ -0,0 +1,42 @@
+from pydantic import BaseModel, Field
+class RetrievalResult(BaseModel):
+    precision_at_k: float = Field(..., ge=0.0, le=1.0)
+    recall_at_k: float = Field(..., ge=0.0, le=1.0)
+    mrr: float = Field(..., ge=0.0, le=1.0)
+    k: int = Field(..., gt=0)
+def _precision(retrieved: list[str], relevant: frozenset[str], k: int) -> float:
+    top_k = retrieved[:k]
+    return sum(1 for doc in top_k if doc in relevant) / k
+def _recall(retrieved: list[str], relevant: frozenset[str], k: int) -> float:
+    if not relevant:
+        return 0.0
+    top_k = retrieved[:k]
+    return sum(1 for doc in top_k if doc in relevant) / len(relevant)
+def _mrr(retrieved: list[str], relevant: frozenset[str]) -> float:
+    for rank, doc in enumerate(retrieved, start=1):
+        if doc in relevant:
+            return 1.0 / rank
+    return 0.0
+def evaluate_retrieval(
+    retrieved: list[str],
+    relevant: list[str],
+    k: int,
+) -> RetrievalResult:
+    effective_k = min(k, len(retrieved)) if retrieved else k
+    relevant_set = frozenset(relevant)
+    return RetrievalResult(
+        precision_at_k=_precision(retrieved, relevant_set, effective_k),
+        recall_at_k=_recall(retrieved, relevant_set, effective_k),
+        mrr=_mrr(retrieved, relevant_set),
+        k=effective_k,
+    )

rag_audit/report/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Audit report generation for RAG pipelines."""
+from rag_audit.report.models import AuditReport
+from rag_audit.report.renderer import ReportRenderer
+__all__ = ["AuditReport", "ReportRenderer"]

rag_audit/report/models.py ADDED Viewed

@@ -0,0 +1,18 @@
+from __future__ import annotations
+from datetime import datetime
+from pydantic import BaseModel, Field
+from rag_audit.metrics.hallucination import FaithfulnessResult
+from rag_audit.metrics.retrieval import RetrievalResult
+class AuditReport(BaseModel):
+    pipeline_id: str = Field(..., description="Identifier for the audited pipeline.")
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    question: str
+    answer: str
+    contexts: list[str]
+    retrieval: RetrievalResult
+    faithfulness: FaithfulnessResult

rag_audit/report/renderer.py ADDED Viewed

@@ -0,0 +1,58 @@
+from rag_audit.report.models import AuditReport
+_MARKDOWN_TEMPLATE = """\
+# Audit Report — {pipeline_id}
+**Date:** {created_at}
+## Question
+{question}
+## Answer
+{answer}
+## Retrieval (k={k})
+| Metric | Score |
+|---|---|
+| Precision@k | {precision:.2%} |
+| Recall@k | {recall:.2%} |
+| MRR | {mrr:.4f} |
+## Faithfulness
+**Score:** {faith_score:.2%} — **Verdict:** {verdict}
+**Reasoning:** {reasoning}
+## Contexts
+{contexts}
+"""
+class ReportRenderer:
+    def to_json(self, report: AuditReport) -> str:
+        return report.model_dump_json(indent=2)
+    def to_markdown(self, report: AuditReport) -> str:
+        contexts_block = "\n".join(
+            f"{i + 1}. {ctx}" for i, ctx in enumerate(report.contexts)
+        )
+        verdict = "FAITHFUL" if report.faithfulness.is_faithful else "HALLUCINATION"
+        return _MARKDOWN_TEMPLATE.format(
+            pipeline_id=report.pipeline_id,
+            created_at=report.created_at.strftime("%Y-%m-%d %H:%M UTC"),
+            question=report.question,
+            answer=report.answer,
+            k=report.retrieval.k,
+            precision=report.retrieval.precision_at_k,
+            recall=report.retrieval.recall_at_k,
+            mrr=report.retrieval.mrr,
+            faith_score=report.faithfulness.score,
+            verdict=verdict,
+            reasoning=report.faithfulness.reasoning,
+            contexts=contexts_block,
+        )

rag_audit-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,225 @@
+Metadata-Version: 2.4
+Name: rag-audit
+Version: 0.1.0
+Summary: CLI + library to audit and benchmark RAG pipelines
+Project-URL: Homepage, https://github.com/seu-usuario/rag-audit
+Project-URL: Documentation, https://seu-usuario.github.io/rag-audit
+Project-URL: Repository, https://github.com/seu-usuario/rag-audit
+Project-URL: Bug Tracker, https://github.com/seu-usuario/rag-audit/issues
+License: MIT License
+        Copyright (c) 2026 Andrey Pontes
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: audit,evaluation,llm,rag,retrieval
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.11
+Requires-Dist: chromadb>=0.5
+Requires-Dist: httpx>=0.27
+Requires-Dist: langchain-anthropic>=0.1
+Requires-Dist: langchain-openai>=0.1
+Requires-Dist: langchain>=0.2
+Requires-Dist: loguru>=0.7
+Requires-Dist: pydantic>=2.0
+Requires-Dist: rich>=13.0
+Requires-Dist: typer>=0.12
+Description-Content-Type: text/markdown
+# rag-audit
+CLI + library to audit and benchmark RAG pipelines. Detects hallucinations, measures retrieval quality, compares chunking strategies, and generates structured reports.
+**[Documentation](https://andrey-pontes.github.io/rag-audit/)**
+## Installation
+```bash
+pip install rag-audit
+```
+Or with [uv](https://docs.astral.sh/uv/):
+```bash
+uv add rag-audit
+```
+## Quickstart
+**1. Create a pipeline config file (`pipeline.json`):**
+```json
+{
+  "pipeline_id": "my-pipeline",
+  "question": "What is the capital of France?",
+  "answer": "Paris is the capital of France.",
+  "contexts": [
+    "Paris is the capital and largest city of France.",
+    "France is a country in Western Europe."
+  ],
+  "relevant": [
+    "Paris is the capital and largest city of France."
+  ],
+  "k": 2,
+  "llm": {
+    "provider": "openai",
+    "model": "gpt-4o-mini"
+  }
+}
+```
+**2. Run the audit:**
+```bash
+export OPENAI_API_KEY=sk-...
+rag-audit run pipeline.json -o result.json
+```
+**3. Generate a report:**
+```bash
+# Markdown (default)
+rag-audit report result.json
+# JSON
+rag-audit report result.json --format json
+```
+## Config reference
+| Field | Type | Description |
+|---|---|---|
+| `pipeline_id` | `string` | Identifier for the pipeline being audited |
+| `question` | `string` | The question posed to the RAG pipeline |
+| `answer` | `string` | The answer generated by the pipeline |
+| `contexts` | `string[]` | Retrieved chunks, in rank order |
+| `relevant` | `string[]` | Ground-truth relevant chunks (for retrieval metrics) |
+| `k` | `int` | Number of top chunks to evaluate (default: `5`) |
+| `llm.provider` | `"openai"` \| `"anthropic"` | LLM provider for the faithfulness judge |
+| `llm.model` | `string` | Model name (e.g. `"gpt-4o-mini"`, `"claude-3-5-haiku-20241022"`) |
+## Metrics
+### Retrieval
+| Metric | Description |
+|---|---|
+| **Precision@k** | Fraction of the top-k retrieved chunks that are relevant |
+| **Recall@k** | Fraction of all relevant chunks that appear in the top-k |
+| **MRR** | Mean Reciprocal Rank — how high the first relevant chunk ranks |
+### Faithfulness
+| Metric | Description |
+|---|---|
+| **Score** | 0.0–1.0 — how well the answer is grounded in the retrieved contexts |
+| **Verdict** | `FAITHFUL` if score ≥ threshold (default `0.5`), otherwise `HALLUCINATION` |
+## Python API
+### Audit a pipeline
+```python
+from rag_audit.core.config import PipelineConfig, LLMConfig
+from rag_audit.core.runner import AuditRunner
+from rag_audit.report.renderer import ReportRenderer
+config = PipelineConfig(
+    pipeline_id="my-pipeline",
+    question="What is the capital of France?",
+    answer="Paris is the capital of France.",
+    contexts=["Paris is the capital and largest city of France."],
+    relevant=["Paris is the capital and largest city of France."],
+    k=1,
+    llm=LLMConfig(provider="openai", model="gpt-4o-mini"),
+)
+report = AuditRunner(config).run()
+print(ReportRenderer().to_markdown(report))
+```
+### Compare chunking strategies
+```python
+from langchain_openai import OpenAIEmbeddings
+from rag_audit.chunker import ChunkingEvaluator, FixedSizeChunker, RecursiveChunker, SemanticChunker
+embeddings = OpenAIEmbeddings()
+evaluator = ChunkingEvaluator(embeddings)
+report = evaluator.evaluate(
+    "Your long document text here...",
+    {
+        "fixed": FixedSizeChunker(chunk_size=500, overlap=50),
+        "recursive": RecursiveChunker(chunk_size=500),
+        "semantic": SemanticChunker(embeddings, similarity_threshold=0.8),
+    },
+)
+print(f"Best strategy: {report.best_strategy}")
+for s in report.strategies:
+    print(f"  {s.strategy}: avg_cohesion={s.avg_cohesion:.3f}, chunks={s.chunk_count}")
+```
+### Use a vectorstore adapter
+```python
+from rag_audit.adapters import ChromaDBAdapter
+adapter = ChromaDBAdapter("my-collection")
+adapter.add(ids=["doc1"], texts=["Paris is in France."], embeddings=[[...]])
+results = adapter.query(embedding=[...], k=1)
+```
+## Roadmap
+- [x] CLI (`rag-audit run`, `rag-audit report`)
+- [x] Hallucination detection (LLM-as-judge)
+- [x] Retrieval metrics (Precision@k, Recall@k, MRR)
+- [x] Structured audit reports (JSON + Markdown)
+- [x] Chunking strategy benchmark (fixed-size vs recursive vs semantic)
+- [x] Vectorstore adapters (ChromaDB — Pinecone and Qdrant coming soon)
+- [x] Documentation (GitHub Pages)
+- [ ] PyPI release
+## Development
+```bash
+# Install dependencies
+uv sync --group dev
+# Run tests
+uv run pytest
+# Lint + format
+uv run ruff check src/ tests/
+uv run ruff format src/ tests/
+# Type check
+uv run mypy src/rag_audit
+# Build docs locally
+uv sync --group docs
+uv run mkdocs serve
+```

rag_audit-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,26 @@
+rag_audit/__init__.py,sha256=F4I8YR81Fo281iRKpkEGgMIWpfTPQ6LIl19bo62Vzlc,95
+rag_audit/adapters/__init__.py,sha256=UBCsVaBtrarw-wbAxZvtWjOGVF-nHnlstOF2HKXK_5U,328
+rag_audit/adapters/base.py,sha256=RwjabhQNKcMaG9Z3e2MsrLGcE4fc55HukVfDq69UM2M,366
+rag_audit/adapters/chroma.py,sha256=Azi0tn9jpQSjWWwwlXW_ov5YXIVU91TqewpSEi4FfdQ,1367
+rag_audit/adapters/stubs.py,sha256=OEoGKJkY2_GNWIiJKUxfSgnDUsFgmmfKXvvPwT5RRYg,816
+rag_audit/chunker/__init__.py,sha256=wATANNgiuL1YB8_jZFFtou1mRCnPg3-rdC2KkyhNk2s,549
+rag_audit/chunker/base.py,sha256=bw2x1eWqKlZUxy7rWkBfyT0FPZUN6noHeNbwzvkoh6g,460
+rag_audit/chunker/evaluator.py,sha256=BudrgEyIuROxaWymRpQx0OtGOUZWaqmy_S0dhDNS0e0,1704
+rag_audit/chunker/models.py,sha256=g5eAzesvtj5y-_4tsbeUFanqQAkdv7CSFMLQjuJIZCQ,450
+rag_audit/chunker/strategies.py,sha256=XfEhsFtH0zqmgmnV3DTpfq1PVL7wtyR0ooaL_9uyw0A,3065
+rag_audit/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rag_audit/cli/main.py,sha256=1n4HQlb7MCMJDY4QBIvs3Icyi6CN2ySCqNx5KEbtDls,2073
+rag_audit/core/__init__.py,sha256=uWm8nOHY6dJlDjrAyZk-1_7QrAgCs3QR6QeGPQLPdK0,46
+rag_audit/core/config.py,sha256=sHmrnrm6BmVivZ7pri3ng7jOivueWECWvwdj3oUG5B0,541
+rag_audit/core/runner.py,sha256=uvDasgevFWEdxj_u3JMbBX2sZZQIfkB1a2cM13t-GpQ,1405
+rag_audit/metrics/__init__.py,sha256=KksI0lNVykFhJNILwEvYa3UJYL-tWGRUyb5X-o2LIeM,333
+rag_audit/metrics/hallucination.py,sha256=si_AaHSTavXYIegq3IfDacYV9v2NtWi43yuPmoorlko,2016
+rag_audit/metrics/retrieval.py,sha256=zRyoWHaRfB38lJ_VrwAJuh3ydPh93arwMKLF-0HCqUk,1283
+rag_audit/report/__init__.py,sha256=bDMU_bJazD_h8i0uj-R6Z6UDB0PC09JZ1kMdEcFmvNI,196
+rag_audit/report/models.py,sha256=KM100LyLrvSFFpMYgnadwjzlBH9HiI6tsIoN-lVQEMk,535
+rag_audit/report/renderer.py,sha256=76Fj0LLqxeHV5i776-tBSj1ckaPL8ROTZ-J-e9nTB20,1411
+rag_audit-0.1.0.dist-info/METADATA,sha256=w8ji8AWBSVEc-7DhrYDmDZad-M1ir84KIBDIIyCM5A0,6957
+rag_audit-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+rag_audit-0.1.0.dist-info/entry_points.txt,sha256=aJN6kN55_mRkGxKS_l_BrWen0xvhlvVJvo8AlNVJmhU,53
+rag_audit-0.1.0.dist-info/licenses/LICENSE,sha256=1Aj1te3ZIg2_1l0Oa4KZYVIuqkeF8tyFV9BWcDe-JO8,1070
+rag_audit-0.1.0.dist-info/RECORD,,

rag_audit-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

rag_audit-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ rag-audit = rag_audit.cli.main:app

rag_audit-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Andrey Pontes
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.