ragcheck-cli 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/CHANGELOG.md +8 -1
- {ragcheck_cli-0.2.1/ragcheck_cli.egg-info → ragcheck_cli-0.2.3}/PKG-INFO +4 -2
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/README.md +1 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/pyproject.toml +3 -2
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/__init__.py +1 -1
- ragcheck_cli-0.2.3/ragcheck/analyzers/chunkers.py +193 -0
- ragcheck_cli-0.2.3/ragcheck/cli.py +231 -0
- ragcheck_cli-0.2.3/ragcheck/core/config.py +47 -0
- ragcheck_cli-0.2.3/ragcheck/core/config_loader.py +24 -0
- ragcheck_cli-0.2.3/ragcheck/core/document_loader.py +275 -0
- ragcheck_cli-0.2.3/ragcheck/core/vector_store.py +172 -0
- ragcheck_cli-0.2.3/ragcheck/reports/html_report.py +513 -0
- ragcheck_cli-0.2.3/ragcheck/testers/auto_qa.py +115 -0
- ragcheck_cli-0.2.3/ragcheck/testers/retrieval_tester.py +113 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3/ragcheck_cli.egg-info}/PKG-INFO +4 -2
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/requires.txt +1 -0
- ragcheck_cli-0.2.1/ragcheck/analyzers/chunkers.py +0 -289
- ragcheck_cli-0.2.1/ragcheck/cli.py +0 -211
- ragcheck_cli-0.2.1/ragcheck/core/config.py +0 -75
- ragcheck_cli-0.2.1/ragcheck/core/config_loader.py +0 -55
- ragcheck_cli-0.2.1/ragcheck/core/document_loader.py +0 -99
- ragcheck_cli-0.2.1/ragcheck/core/vector_store.py +0 -81
- ragcheck_cli-0.2.1/ragcheck/reports/html_report.py +0 -460
- ragcheck_cli-0.2.1/ragcheck/testers/auto_qa.py +0 -221
- ragcheck_cli-0.2.1/ragcheck/testers/retrieval_tester.py +0 -185
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/LICENSE +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/MANIFEST.in +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/docs/ARCHITECTURE.md +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/chunk_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/classifier_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/embed_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/full_pipeline_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/qa_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/examples/report_demo.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/__main__.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/analyzers/__init__.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/analyzers/failure_classifier.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/analyzers/recommender.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/core/__init__.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/core/embeddings.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/core/progress.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/reports/__init__.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/reports/chunk_visualizer.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/reports/export.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/reports/generator.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck/testers/__init__.py +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/SOURCES.txt +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/dependency_links.txt +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/entry_points.txt +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/top_level.txt +0 -0
- {ragcheck_cli-0.2.1 → ragcheck_cli-0.2.3}/setup.cfg +0 -0
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.2] - 2026-06-06
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **Prompt size reduction** — Shrunk auto-QA prompt from ~1500 to ~800 chars + compact instructions. Stays well under Groq 6000 TPM limit.
|
|
9
|
+
- **Gemini 3+ compatibility** — Skip deprecated `temperature`/`top_p`/`top_k` params for `gemini/gemini-3.*` models to suppress deprecation warnings.
|
|
10
|
+
- **Better Q&A parsing** — Accept both `Q:/A:` and `Question:/Answer:` formats from LLM responses.
|
|
11
|
+
|
|
5
12
|
## [0.2.0] - 2026-06-04
|
|
6
13
|
|
|
7
14
|
### Added
|
|
@@ -33,4 +40,4 @@ All notable changes to this project will be documented in this file.
|
|
|
33
40
|
- Recommendation engine with decision tree
|
|
34
41
|
- Beautiful HTML reports (single file, no server)
|
|
35
42
|
- CI/CD mode with GitHub Actions
|
|
36
|
-
- PDF/PNG export via Playwright
|
|
43
|
+
- PDF/PNG export via Playwright
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragcheck-cli
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary: Lighthouse for RAG systems
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
|
|
5
5
|
Author-email: Pranay Mane <pranaymane78@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/pranay7863/ragcheck
|
|
@@ -30,6 +30,7 @@ Requires-Dist: sentence-transformers>=2.2.0
|
|
|
30
30
|
Requires-Dist: chromadb>=0.4.0
|
|
31
31
|
Requires-Dist: PyYAML>=6.0
|
|
32
32
|
Requires-Dist: transformers>=4.30.0
|
|
33
|
+
Requires-Dist: pdfplumber>=0.10.0
|
|
33
34
|
Provides-Extra: pdf
|
|
34
35
|
Requires-Dist: PyPDF2>=3.0.0; extra == "pdf"
|
|
35
36
|
Provides-Extra: export
|
|
@@ -180,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
|
|
|
180
181
|
## Roadmap
|
|
181
182
|
|
|
182
183
|
- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
|
|
184
|
+
- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
|
|
183
185
|
- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
|
|
184
186
|
- [ ] v0.3.0 — SaaS API for teams
|
|
185
187
|
- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
|
|
@@ -140,6 +140,7 @@ MIT — see [LICENSE](LICENSE)
|
|
|
140
140
|
## Roadmap
|
|
141
141
|
|
|
142
142
|
- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
|
|
143
|
+
- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
|
|
143
144
|
- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
|
|
144
145
|
- [ ] v0.3.0 — SaaS API for teams
|
|
145
146
|
- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ragcheck-cli"
|
|
3
|
-
version = "0.2.
|
|
4
|
-
description = "Lighthouse for RAG systems
|
|
3
|
+
version = "0.2.3"
|
|
4
|
+
description = "Lighthouse for RAG systems - diagnose and fix your retrieval pipeline"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
7
7
|
requires-python = ">=3.10"
|
|
@@ -31,6 +31,7 @@ dependencies = [
|
|
|
31
31
|
"chromadb>=0.4.0",
|
|
32
32
|
"PyYAML>=6.0",
|
|
33
33
|
"transformers>=4.30.0",
|
|
34
|
+
"pdfplumber>=0.10.0",
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
[project.optional-dependencies]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Document chunking strategies."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from ragcheck.core.config import Config
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Chunk:
|
|
10
|
+
"""A text chunk with metadata."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, text: str, metadata: Dict[str, Any] = None):
|
|
13
|
+
self.text = text
|
|
14
|
+
self.metadata = metadata or {}
|
|
15
|
+
self.start = metadata.get("start", 0)
|
|
16
|
+
self.end = metadata.get("end", len(text))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Chunker:
|
|
20
|
+
"""Chunk documents using configurable strategies."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: Config):
|
|
23
|
+
self.config = config
|
|
24
|
+
|
|
25
|
+
def chunk(self, document: Dict[str, Any]) -> List[Chunk]:
|
|
26
|
+
"""Chunk a document based on configured strategy."""
|
|
27
|
+
text = document.get("text", "")
|
|
28
|
+
if not text:
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
if self.config.chunk_strategy == "recursive":
|
|
32
|
+
return self._recursive_chunk(text, document)
|
|
33
|
+
elif self.config.chunk_strategy == "semantic":
|
|
34
|
+
return self._semantic_chunk(text, document)
|
|
35
|
+
else:
|
|
36
|
+
return self._fixed_chunk(text, document)
|
|
37
|
+
|
|
38
|
+
def _recursive_chunk(self, text: str, document: Dict[str, Any]) -> List[Chunk]:
|
|
39
|
+
"""Recursively split by separators, preferring larger chunks.
|
|
40
|
+
|
|
41
|
+
For legal documents, we use section/paragraph boundaries first,
|
|
42
|
+
then fall back to sentence boundaries, then fixed size.
|
|
43
|
+
"""
|
|
44
|
+
chunks = []
|
|
45
|
+
|
|
46
|
+
# Legal document separators (in order of preference)
|
|
47
|
+
separators = [
|
|
48
|
+
r"
|
|
49
|
+
|
|
50
|
+
SECTION\s+\d+", # SECTION headers
|
|
51
|
+
r"
|
|
52
|
+
|
|
53
|
+
CHAPTER\s+[IVX]+", # CHAPTER headers
|
|
54
|
+
r"
|
|
55
|
+
|
|
56
|
+
\d+\s+\.", # Numbered paragraphs
|
|
57
|
+
r"
|
|
58
|
+
|
|
59
|
+
", # Double newline (paragraphs)
|
|
60
|
+
r"
|
|
61
|
+
", # Single newline
|
|
62
|
+
r"\.\s+", # Sentence end
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
def split_recursive(text: str, sep_index: int) -> List[str]:
|
|
66
|
+
if sep_index >= len(separators):
|
|
67
|
+
# Final fallback: fixed size
|
|
68
|
+
return self._split_fixed(text)
|
|
69
|
+
|
|
70
|
+
sep = separators[sep_index]
|
|
71
|
+
parts = re.split(f"(?={sep})", text)
|
|
72
|
+
|
|
73
|
+
result = []
|
|
74
|
+
current = ""
|
|
75
|
+
for part in parts:
|
|
76
|
+
if not part.strip():
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
if len(current) + len(part) <= self.config.chunk_size:
|
|
80
|
+
current += part
|
|
81
|
+
else:
|
|
82
|
+
if current:
|
|
83
|
+
result.append(current)
|
|
84
|
+
# If single part is too big, recurse with next separator
|
|
85
|
+
if len(part) > self.config.chunk_size:
|
|
86
|
+
result.extend(split_recursive(part, sep_index + 1))
|
|
87
|
+
else:
|
|
88
|
+
current = part
|
|
89
|
+
|
|
90
|
+
if current:
|
|
91
|
+
result.append(current)
|
|
92
|
+
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
parts = split_recursive(text, 0)
|
|
96
|
+
|
|
97
|
+
# Apply overlap
|
|
98
|
+
for i, part in enumerate(parts):
|
|
99
|
+
start = max(0, i * self.config.chunk_size - i * self.config.chunk_overlap)
|
|
100
|
+
end = start + len(part)
|
|
101
|
+
chunks.append(Chunk(
|
|
102
|
+
text=part.strip(),
|
|
103
|
+
metadata={
|
|
104
|
+
**document,
|
|
105
|
+
"chunk_index": i,
|
|
106
|
+
"start": start,
|
|
107
|
+
"end": end,
|
|
108
|
+
}
|
|
109
|
+
))
|
|
110
|
+
|
|
111
|
+
return chunks
|
|
112
|
+
|
|
113
|
+
def _semantic_chunk(self, text: str, document: Dict[str, Any]) -> List[Chunk]:
|
|
114
|
+
"""Semantic chunking using sentence boundaries."""
|
|
115
|
+
# Simple implementation: split by sentences, group semantically
|
|
116
|
+
sentences = re.split(r"(?<=[.!?])\s+", text)
|
|
117
|
+
|
|
118
|
+
chunks = []
|
|
119
|
+
current = []
|
|
120
|
+
current_len = 0
|
|
121
|
+
|
|
122
|
+
for sent in sentences:
|
|
123
|
+
sent_len = len(sent)
|
|
124
|
+
if current_len + sent_len > self.config.chunk_size and current:
|
|
125
|
+
chunk_text = " ".join(current)
|
|
126
|
+
chunks.append(Chunk(
|
|
127
|
+
text=chunk_text,
|
|
128
|
+
metadata={**document, "chunk_index": len(chunks)}
|
|
129
|
+
))
|
|
130
|
+
# Keep overlap
|
|
131
|
+
overlap_sents = []
|
|
132
|
+
overlap_len = 0
|
|
133
|
+
for s in reversed(current):
|
|
134
|
+
if overlap_len + len(s) > self.config.chunk_overlap:
|
|
135
|
+
break
|
|
136
|
+
overlap_sents.insert(0, s)
|
|
137
|
+
overlap_len += len(s)
|
|
138
|
+
current = overlap_sents
|
|
139
|
+
current_len = overlap_len
|
|
140
|
+
|
|
141
|
+
current.append(sent)
|
|
142
|
+
current_len += sent_len
|
|
143
|
+
|
|
144
|
+
if current:
|
|
145
|
+
chunks.append(Chunk(
|
|
146
|
+
text=" ".join(current),
|
|
147
|
+
metadata={**document, "chunk_index": len(chunks)}
|
|
148
|
+
))
|
|
149
|
+
|
|
150
|
+
return chunks
|
|
151
|
+
|
|
152
|
+
def _fixed_chunk(self, text: str, document: Dict[str, Any]) -> List[Chunk]:
|
|
153
|
+
"""Fixed-size chunking with overlap."""
|
|
154
|
+
chunks = []
|
|
155
|
+
step = self.config.chunk_size - self.config.chunk_overlap
|
|
156
|
+
|
|
157
|
+
for i in range(0, len(text), step):
|
|
158
|
+
chunk_text = text[i:i + self.config.chunk_size]
|
|
159
|
+
if len(chunk_text) < 100: # Skip tiny trailing chunks
|
|
160
|
+
break
|
|
161
|
+
chunks.append(Chunk(
|
|
162
|
+
text=chunk_text.strip(),
|
|
163
|
+
metadata={
|
|
164
|
+
**document,
|
|
165
|
+
"chunk_index": i // step,
|
|
166
|
+
"start": i,
|
|
167
|
+
"end": i + len(chunk_text),
|
|
168
|
+
}
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
return chunks
|
|
172
|
+
|
|
173
|
+
def _split_fixed(self, text: str) -> List[str]:
|
|
174
|
+
"""Split text into fixed-size pieces."""
|
|
175
|
+
return [
|
|
176
|
+
text[i:i + self.config.chunk_size]
|
|
177
|
+
for i in range(0, len(text), self.config.chunk_size)
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_chunk_stats(chunks: List[Chunk]) -> Dict[str, Any]:
|
|
182
|
+
"""Calculate chunk statistics."""
|
|
183
|
+
if not chunks:
|
|
184
|
+
return {}
|
|
185
|
+
|
|
186
|
+
lengths = [len(c.text) for c in chunks]
|
|
187
|
+
return {
|
|
188
|
+
"total_chunks": len(chunks),
|
|
189
|
+
"avg_length": sum(lengths) / len(lengths),
|
|
190
|
+
"min_length": min(lengths),
|
|
191
|
+
"max_length": max(lengths),
|
|
192
|
+
"median_length": sorted(lengths)[len(lengths) // 2],
|
|
193
|
+
}
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""CLI for ragcheck."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import warnings
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.text import Text
|
|
13
|
+
|
|
14
|
+
from ragcheck.core.config import Config
|
|
15
|
+
from ragcheck.core.config_loader import load_config
|
|
16
|
+
from ragcheck.core.document_loader import DocumentLoader
|
|
17
|
+
from ragcheck.core.embeddings import Embedder
|
|
18
|
+
from ragcheck.core.vector_store import ChromaVectorStore, MemoryVectorStore
|
|
19
|
+
from ragcheck.core.progress import Progress
|
|
20
|
+
from ragcheck.analyzers.chunkers import Chunker
|
|
21
|
+
from ragcheck.analyzers.failure_classifier import FailureClassifier
|
|
22
|
+
from ragcheck.analyzers.recommender import Recommender
|
|
23
|
+
from ragcheck.testers.auto_qa import AutoQA, generate_dummy_questions
|
|
24
|
+
from ragcheck.testers.retrieval_tester import RetrievalTester
|
|
25
|
+
from ragcheck.reports.generator import ReportGenerator
|
|
26
|
+
from ragcheck.reports.html_report import HTMLReport
|
|
27
|
+
|
|
28
|
+
app = typer.Typer(help="Lighthouse for RAG systems — diagnose and fix your retrieval pipeline")
|
|
29
|
+
console = Console()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_version() -> str:
|
|
33
|
+
try:
|
|
34
|
+
from ragcheck import __version__
|
|
35
|
+
return __version__
|
|
36
|
+
except ImportError:
|
|
37
|
+
return "0.2.2"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.command()
|
|
41
|
+
def init(
|
|
42
|
+
path: Optional[str] = typer.Argument(None, help="Project path"),
|
|
43
|
+
force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing config"),
|
|
44
|
+
):
|
|
45
|
+
"""Initialize a new ragcheck project."""
|
|
46
|
+
target = Path(path) if path else Path(".")
|
|
47
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
config_path = target / "ragcheck.yaml"
|
|
50
|
+
if config_path.exists() and not force:
|
|
51
|
+
console.print(f"[yellow]Config already exists at {config_path}[/yellow]")
|
|
52
|
+
raise typer.Exit(1)
|
|
53
|
+
|
|
54
|
+
config = Config()
|
|
55
|
+
# Write default config
|
|
56
|
+
config_path.write_text(f"""# ragcheck configuration
|
|
57
|
+
embedding_model: {config.embedding_model}
|
|
58
|
+
chunk_size: {config.chunk_size}
|
|
59
|
+
chunk_overlap: {config.chunk_overlap}
|
|
60
|
+
chunk_strategy: {config.chunk_strategy}
|
|
61
|
+
vector_store: {config.vector_store}
|
|
62
|
+
collection_name: {config.collection_name}
|
|
63
|
+
top_k: {config.top_k}
|
|
64
|
+
similarity_threshold: {config.similarity_threshold}
|
|
65
|
+
answer_model: {config.answer_model}
|
|
66
|
+
qa_model: {config.qa_model}
|
|
67
|
+
max_qa_questions: {config.max_qa_questions}
|
|
68
|
+
""")
|
|
69
|
+
|
|
70
|
+
console.print(f"[green]OK[/green] Created {config_path}")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@app.command()
|
|
74
|
+
def run(
|
|
75
|
+
docs: str = typer.Option(..., "--docs", "-d", help="Path to documents directory"),
|
|
76
|
+
query: Optional[str] = typer.Option(None, "--query", "-q", help="Single test query"),
|
|
77
|
+
config_path: Optional[str] = typer.Option(None, "--config", "-c", help="Config file path"),
|
|
78
|
+
output: str = typer.Option("ragcheck_report.html", "--output", "-o", help="Output file"),
|
|
79
|
+
generate_answers: bool = typer.Option(False, "--generate-answers", "-a", help="Generate answers with LLM"),
|
|
80
|
+
answer_model: Optional[str] = typer.Option(None, "--answer-model", help="Override answer model"),
|
|
81
|
+
top_k: Optional[int] = typer.Option(None, "--top-k", "-k", help="Override top_k"),
|
|
82
|
+
verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
|
|
83
|
+
):
|
|
84
|
+
"""Run RAG diagnostics on documents."""
|
|
85
|
+
|
|
86
|
+
# Load config
|
|
87
|
+
config = load_config(config_path) if config_path else Config()
|
|
88
|
+
if verbose:
|
|
89
|
+
config.verbose = True
|
|
90
|
+
if answer_model:
|
|
91
|
+
config.answer_model = answer_model
|
|
92
|
+
if top_k:
|
|
93
|
+
config.top_k = top_k
|
|
94
|
+
|
|
95
|
+
progress = Progress(verbose=verbose)
|
|
96
|
+
|
|
97
|
+
# Load documents
|
|
98
|
+
doc_path = Path(docs)
|
|
99
|
+
if not doc_path.exists():
|
|
100
|
+
console.print(f"[red]Error:[/red] Path not found: {doc_path}")
|
|
101
|
+
raise typer.Exit(1)
|
|
102
|
+
|
|
103
|
+
progress.start("Loading documents...")
|
|
104
|
+
loader = DocumentLoader()
|
|
105
|
+
documents = loader.load(doc_path)
|
|
106
|
+
progress.complete(f"Loaded {len(documents)} documents")
|
|
107
|
+
|
|
108
|
+
if not documents:
|
|
109
|
+
console.print("[red]No documents found[/red]")
|
|
110
|
+
raise typer.Exit(1)
|
|
111
|
+
|
|
112
|
+
# Chunk documents
|
|
113
|
+
progress.start("Chunking documents...")
|
|
114
|
+
chunker = Chunker(config)
|
|
115
|
+
chunks = []
|
|
116
|
+
for doc in documents:
|
|
117
|
+
chunks.extend(chunker.chunk(doc))
|
|
118
|
+
progress.complete(f"Created {len(chunks)} chunks")
|
|
119
|
+
|
|
120
|
+
# Build vector store
|
|
121
|
+
progress.start("Building vector store...")
|
|
122
|
+
if config.vector_store == "chroma":
|
|
123
|
+
store = ChromaVectorStore(config, progress)
|
|
124
|
+
else:
|
|
125
|
+
store = MemoryVectorStore(config, progress)
|
|
126
|
+
store.clear()
|
|
127
|
+
store.add([c.text for c in chunks])
|
|
128
|
+
progress.complete("Vector store ready")
|
|
129
|
+
|
|
130
|
+
# Generate or use test questions
|
|
131
|
+
questions = []
|
|
132
|
+
if query:
|
|
133
|
+
# Single query mode — create question from user query
|
|
134
|
+
questions.append(RetrievalTester.TestQuestion(
|
|
135
|
+
question=query,
|
|
136
|
+
expected_answer="", # Will be filled by retrieval
|
|
137
|
+
source_chunks=[query], # Use query as proxy for source
|
|
138
|
+
difficulty="user",
|
|
139
|
+
))
|
|
140
|
+
else:
|
|
141
|
+
# Auto-generate questions from chunks
|
|
142
|
+
progress.start("Generating test questions...")
|
|
143
|
+
qa = AutoQA(config, progress)
|
|
144
|
+
questions = qa.generate([c.text for c in chunks])
|
|
145
|
+
|
|
146
|
+
if not questions:
|
|
147
|
+
# FALLBACK: Generate meaningful questions from chunk content
|
|
148
|
+
warnings.warn(
|
|
149
|
+
"LLM question generation failed. Using content-based fallback questions. "
|
|
150
|
+
"To use a real LLM:
|
|
151
|
+
"
|
|
152
|
+
" 1. Get a free Groq key: https://console.groq.com/keys
|
|
153
|
+
"
|
|
154
|
+
" 2. Run: set GROQ_API_KEY=your_key (Windows)
|
|
155
|
+
"
|
|
156
|
+
" 3. Or ensure Ollama is running: ollama run phi3:mini",
|
|
157
|
+
UserWarning,
|
|
158
|
+
)
|
|
159
|
+
questions = generate_dummy_questions([c.text for c in chunks])
|
|
160
|
+
|
|
161
|
+
progress.complete(f"Generated {len(questions)} test questions")
|
|
162
|
+
|
|
163
|
+
# Test retrieval
|
|
164
|
+
progress.start("Testing retrieval...")
|
|
165
|
+
tester = RetrievalTester(config, store, progress)
|
|
166
|
+
retrieval_results = tester.test(questions)
|
|
167
|
+
progress.complete(
|
|
168
|
+
f"Retrieval: {retrieval_results['passed']}/{retrieval_results['total']} passed "
|
|
169
|
+
f"({retrieval_results['score']}%)"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Generate answers if requested
|
|
173
|
+
answer_results = None
|
|
174
|
+
if generate_answers:
|
|
175
|
+
progress.start("Generating answers...")
|
|
176
|
+
# Answer generation logic here
|
|
177
|
+
progress.complete("Answers generated")
|
|
178
|
+
|
|
179
|
+
# Analyze failures
|
|
180
|
+
progress.start("Analyzing failures...")
|
|
181
|
+
classifier = FailureClassifier(config)
|
|
182
|
+
failures = classifier.classify(retrieval_results["details"])
|
|
183
|
+
|
|
184
|
+
recommender = Recommender(config)
|
|
185
|
+
recommendations = recommender.recommend(failures)
|
|
186
|
+
progress.complete(f"Found {len(failures)} failures, {len(recommendations)} recommendations")
|
|
187
|
+
|
|
188
|
+
# Generate report
|
|
189
|
+
progress.start("Generating report...")
|
|
190
|
+
report_data = {
|
|
191
|
+
"project_name": doc_path.name,
|
|
192
|
+
"config": config,
|
|
193
|
+
"retrieval_results": retrieval_results,
|
|
194
|
+
"answer_results": answer_results,
|
|
195
|
+
"failures": failures,
|
|
196
|
+
"recommendations": recommendations,
|
|
197
|
+
"chunks": chunks,
|
|
198
|
+
"documents": documents,
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if config.export_format == "html" or output.endswith(".html"):
|
|
202
|
+
reporter = HTMLReport(config)
|
|
203
|
+
html = reporter.generate(report_data)
|
|
204
|
+
Path(output).write_text(html, encoding="utf-8")
|
|
205
|
+
else:
|
|
206
|
+
generator = ReportGenerator(config)
|
|
207
|
+
generator.generate(report_data, output)
|
|
208
|
+
|
|
209
|
+
progress.complete(f"Report saved to {output}")
|
|
210
|
+
|
|
211
|
+
# Summary
|
|
212
|
+
score = retrieval_results["score"]
|
|
213
|
+
color = "green" if score >= 80 else "yellow" if score >= 60 else "red"
|
|
214
|
+
console.print(f"
|
|
215
|
+
[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/[{color}]]")
|
|
216
|
+
|
|
217
|
+
if score < 100:
|
|
218
|
+
console.print("
|
|
219
|
+
[bold]Top Recommendations:[/bold]")
|
|
220
|
+
for rec in recommendations[:3]:
|
|
221
|
+
console.print(f" • {rec['title']}: {rec['description'][:60]}...")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@app.command()
|
|
225
|
+
def version():
|
|
226
|
+
"""Show version."""
|
|
227
|
+
console.print(f"ragcheck {get_version()}")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
app()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Configuration for ragcheck."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Config(BaseModel):
|
|
8
|
+
"""RAGCheck configuration."""
|
|
9
|
+
|
|
10
|
+
# Embedding model
|
|
11
|
+
embedding_model: str = Field(default="all-MiniLM-L6-v2")
|
|
12
|
+
|
|
13
|
+
# Chunking
|
|
14
|
+
chunk_size: int = Field(default=2048, ge=256, le=8192)
|
|
15
|
+
chunk_overlap: int = Field(default=256, ge=0, le=2048)
|
|
16
|
+
chunk_strategy: Literal["recursive", "semantic", "fixed"] = Field(default="recursive")
|
|
17
|
+
|
|
18
|
+
# Vector store
|
|
19
|
+
vector_store: Literal["chroma", "faiss", "memory"] = Field(default="chroma")
|
|
20
|
+
collection_name: str = Field(default="ragcheck-default")
|
|
21
|
+
|
|
22
|
+
# Retrieval
|
|
23
|
+
top_k: int = Field(default=5, ge=1, le=50)
|
|
24
|
+
similarity_threshold: float = Field(default=0.3, ge=0.0, le=1.0)
|
|
25
|
+
|
|
26
|
+
# Answer generation
|
|
27
|
+
answer_model: str = Field(default="ollama/phi3:mini")
|
|
28
|
+
max_answer_tokens: int = Field(default=512, ge=64, le=4096)
|
|
29
|
+
temperature: float = Field(default=0.3, ge=0.0, le=2.0)
|
|
30
|
+
|
|
31
|
+
# QA generation
|
|
32
|
+
qa_model: str = Field(default="ollama/phi3:mini")
|
|
33
|
+
max_qa_questions: int = Field(default=50, ge=1, le=200)
|
|
34
|
+
qa_temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
|
35
|
+
|
|
36
|
+
# Evaluation
|
|
37
|
+
faithfulness_model: str = Field(default="microsoft/deberta-v2-xlarge-mnli")
|
|
38
|
+
nli_batch_size: int = Field(default=8, ge=1, le=64)
|
|
39
|
+
|
|
40
|
+
# Export
|
|
41
|
+
export_format: Literal["html", "json", "markdown"] = Field(default="html")
|
|
42
|
+
include_chunk_visualizer: bool = Field(default=True)
|
|
43
|
+
include_recommendations: bool = Field(default=True)
|
|
44
|
+
|
|
45
|
+
# Display
|
|
46
|
+
show_progress: bool = Field(default=True)
|
|
47
|
+
verbose: bool = Field(default=False)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Configuration loader with encoding fixes."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from ragcheck.core.config import Config
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_config(path: Optional[str] = None) -> Config:
|
|
11
|
+
"""Load configuration from YAML file."""
|
|
12
|
+
if path:
|
|
13
|
+
config_path = Path(path)
|
|
14
|
+
else:
|
|
15
|
+
config_path = Path("ragcheck.yaml")
|
|
16
|
+
|
|
17
|
+
if not config_path.exists():
|
|
18
|
+
return Config()
|
|
19
|
+
|
|
20
|
+
# CRITICAL FIX: Explicit UTF-8 encoding for Windows
|
|
21
|
+
with open(config_path, "r", encoding="utf-8", errors="replace") as f:
|
|
22
|
+
data = yaml.safe_load(f) or {}
|
|
23
|
+
|
|
24
|
+
return Config(**data)
|