ragcheck-cli 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragcheck_cli-0.2.5/ragcheck_cli.egg-info → ragcheck_cli-0.2.7}/PKG-INFO +1 -1
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/pyproject.toml +1 -1
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/__init__.py +3 -3
- ragcheck_cli-0.2.7/ragcheck/analyzers/__init__.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/analyzers/failure_classifier.py +34 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/analyzers/recommender.py +47 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/cli.py +2 -2
- ragcheck_cli-0.2.7/ragcheck/core/__init__.py +0 -0
- ragcheck_cli-0.2.7/ragcheck/core/progress.py +72 -0
- ragcheck_cli-0.2.7/ragcheck/reports/__init__.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/reports/html_report.py +47 -2
- ragcheck_cli-0.2.7/ragcheck/testers/__init__.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7/ragcheck_cli.egg-info}/PKG-INFO +1 -1
- ragcheck_cli-0.2.5/ragcheck/analyzers/__init__.py +0 -1
- ragcheck_cli-0.2.5/ragcheck/core/__init__.py +0 -1
- ragcheck_cli-0.2.5/ragcheck/core/progress.py +0 -41
- ragcheck_cli-0.2.5/ragcheck/reports/__init__.py +0 -1
- ragcheck_cli-0.2.5/ragcheck/testers/__init__.py +0 -1
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/CHANGELOG.md +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/LICENSE +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/MANIFEST.in +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/README.md +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/docs/ARCHITECTURE.md +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/chunk_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/classifier_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/embed_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/full_pipeline_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/qa_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/examples/report_demo.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/__main__.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/analyzers/chunkers.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/core/config.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/core/config_loader.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/core/document_loader.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/core/embeddings.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/core/vector_store.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/reports/chunk_visualizer.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/reports/export.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/reports/generator.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/testers/auto_qa.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck/testers/retrieval_tester.py +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck_cli.egg-info/SOURCES.txt +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck_cli.egg-info/dependency_links.txt +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck_cli.egg-info/entry_points.txt +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck_cli.egg-info/requires.txt +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/ragcheck_cli.egg-info/top_level.txt +0 -0
- {ragcheck_cli-0.2.5 → ragcheck_cli-0.2.7}/setup.cfg +0 -0
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
"""ragcheck — Lighthouse for RAG systems."""
|
|
2
|
-
|
|
3
|
-
__version__ = "0.2.
|
|
1
|
+
"""ragcheck — Lighthouse for RAG systems."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.2.7"
|
|
File without changes
|
|
@@ -161,6 +161,40 @@ class FailureClassifier:
|
|
|
161
161
|
# paraphrased answers from small local models like phi3:mini
|
|
162
162
|
return overlap_ratio > 0.15 or phrase_ratio > 0.20
|
|
163
163
|
|
|
164
|
+
def classify_batch(self, details: list[dict]) -> list[dict]:
|
|
165
|
+
"""Classify a batch of retrieval results (CLI compatibility).
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
details: List of detail dicts from RetrievalTester.test()
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
List of failure dicts for the report.
|
|
172
|
+
"""
|
|
173
|
+
failures = []
|
|
174
|
+
for detail in details:
|
|
175
|
+
if not detail.get("hit", False):
|
|
176
|
+
# Build fake Chunk objects from retrieved texts
|
|
177
|
+
retrieved_texts = detail.get("retrieved", [])
|
|
178
|
+
retrieved_chunks = [Chunk(text=t) for t in retrieved_texts]
|
|
179
|
+
source_chunks = detail.get("source_chunks", [])
|
|
180
|
+
if not source_chunks and detail.get("expected"):
|
|
181
|
+
source_chunks = [detail["expected"]]
|
|
182
|
+
|
|
183
|
+
analysis = self.classify(
|
|
184
|
+
question=detail.get("question", ""),
|
|
185
|
+
expected_answer=detail.get("expected", ""),
|
|
186
|
+
generated_answer="",
|
|
187
|
+
retrieved_chunks=retrieved_chunks,
|
|
188
|
+
source_chunks=source_chunks,
|
|
189
|
+
)
|
|
190
|
+
failures.append({
|
|
191
|
+
"mode": analysis.failure_mode.value,
|
|
192
|
+
"confidence": int(analysis.confidence * 100),
|
|
193
|
+
"explanation": analysis.explanation,
|
|
194
|
+
"question": detail.get("question", ""),
|
|
195
|
+
"description": analysis.explanation,
|
|
196
|
+
})
|
|
197
|
+
return failures
|
|
164
198
|
def _is_boundary_error(self, expected: str, chunks: list[str]) -> bool:
|
|
165
199
|
"""Detect if answer is split across chunk boundaries."""
|
|
166
200
|
chunk_positions = []
|
|
@@ -174,3 +174,50 @@ def predict_scores(
|
|
|
174
174
|
"improvement": cumulative_improvement,
|
|
175
175
|
"recommendations_applied": len(recommendations[:3]),
|
|
176
176
|
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class Recommender:
|
|
180
|
+
"""Compatibility wrapper for CLI — wraps RecommendationEngine."""
|
|
181
|
+
|
|
182
|
+
def __init__(self, config=None):
|
|
183
|
+
self.engine = RecommendationEngine()
|
|
184
|
+
self.config = config
|
|
185
|
+
|
|
186
|
+
def recommend(self, failures: list) -> list[dict]:
|
|
187
|
+
"""Generate recommendations from failure analyses.
|
|
188
|
+
|
|
189
|
+
Accepts either FailureAnalysis objects or raw dicts.
|
|
190
|
+
"""
|
|
191
|
+
from ragcheck.analyzers.failure_classifier import FailureAnalysis, FailureMode
|
|
192
|
+
|
|
193
|
+
analyses = []
|
|
194
|
+
for f in failures:
|
|
195
|
+
if isinstance(f, FailureAnalysis):
|
|
196
|
+
analyses.append(f)
|
|
197
|
+
elif isinstance(f, dict):
|
|
198
|
+
# Convert dict to FailureAnalysis
|
|
199
|
+
mode_str = f.get("mode", "unknown")
|
|
200
|
+
try:
|
|
201
|
+
mode = FailureMode(mode_str)
|
|
202
|
+
except ValueError:
|
|
203
|
+
mode = FailureMode.UNKNOWN
|
|
204
|
+
analyses.append(FailureAnalysis(
|
|
205
|
+
failure_mode=mode,
|
|
206
|
+
confidence=f.get("confidence", 50) / 100.0,
|
|
207
|
+
explanation=f.get("explanation", ""),
|
|
208
|
+
recommendation=f.get("recommendation", ""),
|
|
209
|
+
expected_improvement=0.05,
|
|
210
|
+
))
|
|
211
|
+
|
|
212
|
+
recs = self.engine.generate_recommendations(analyses)
|
|
213
|
+
return [
|
|
214
|
+
{
|
|
215
|
+
"title": r.title,
|
|
216
|
+
"description": r.description,
|
|
217
|
+
"expected_improvement": r.expected_improvement,
|
|
218
|
+
"tradeoffs": r.tradeoffs,
|
|
219
|
+
"difficulty": r.implementation_difficulty,
|
|
220
|
+
"code_example": r.code_example,
|
|
221
|
+
}
|
|
222
|
+
for r in recs
|
|
223
|
+
]
|
|
@@ -176,7 +176,7 @@ To use a real LLM:
|
|
|
176
176
|
# Analyze failures
|
|
177
177
|
progress.start("Analyzing failures...")
|
|
178
178
|
classifier = FailureClassifier(config)
|
|
179
|
-
failures = classifier.
|
|
179
|
+
failures = classifier.classify_batch(retrieval_results["details"])
|
|
180
180
|
|
|
181
181
|
recommender = Recommender(config)
|
|
182
182
|
recommendations = recommender.recommend(failures)
|
|
@@ -208,7 +208,7 @@ To use a real LLM:
|
|
|
208
208
|
# Summary
|
|
209
209
|
score = retrieval_results["score"]
|
|
210
210
|
color = "green" if score >= 80 else "yellow" if score >= 60 else "red"
|
|
211
|
-
console.print(f"\n[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/
|
|
211
|
+
console.print(f"\n[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/]")
|
|
212
212
|
|
|
213
213
|
|
|
214
214
|
if score < 100:
|
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Rich progress tracking for ragcheck operations."""
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.progress import (
|
|
5
|
+
BarColumn,
|
|
6
|
+
Progress,
|
|
7
|
+
SpinnerColumn,
|
|
8
|
+
TextColumn,
|
|
9
|
+
TimeElapsedColumn,
|
|
10
|
+
TimeRemainingColumn,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_progress() -> Progress:
|
|
17
|
+
"""Get a pre-configured Rich Progress instance."""
|
|
18
|
+
return Progress(
|
|
19
|
+
SpinnerColumn(),
|
|
20
|
+
TextColumn("[progress.description]{task.description}"),
|
|
21
|
+
BarColumn(),
|
|
22
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
23
|
+
TimeElapsedColumn(),
|
|
24
|
+
TimeRemainingColumn(),
|
|
25
|
+
console=console,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def print_success(message: str) -> None:
|
|
30
|
+
"""Print a success message."""
|
|
31
|
+
console.print(f"[bold green]OK[/bold green] {message}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def print_warning(message: str) -> None:
|
|
35
|
+
"""Print a warning message."""
|
|
36
|
+
console.print(f"[bold yellow]⚠[/bold yellow] {message}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def print_error(message: str) -> None:
|
|
40
|
+
"""Print an error message."""
|
|
41
|
+
console.print(f"[bold red]✗[/bold red] {message}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Progress:
|
|
45
|
+
"""Simple progress wrapper for CLI compatibility."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, verbose: bool = False):
|
|
48
|
+
self.verbose = verbose
|
|
49
|
+
self._task = None
|
|
50
|
+
self._rich_progress = None
|
|
51
|
+
|
|
52
|
+
def start(self, message: str) -> None:
|
|
53
|
+
"""Start a new progress task."""
|
|
54
|
+
if self.verbose:
|
|
55
|
+
console.print(f"[dim]→ {message}[/dim]")
|
|
56
|
+
self._rich_progress = get_progress()
|
|
57
|
+
self._task = self._rich_progress.add_task(message, total=100)
|
|
58
|
+
self._rich_progress.start()
|
|
59
|
+
|
|
60
|
+
def update(self, message: str) -> None:
|
|
61
|
+
"""Update progress message."""
|
|
62
|
+
if self._rich_progress and self._task is not None:
|
|
63
|
+
self._rich_progress.update(self._task, description=message)
|
|
64
|
+
|
|
65
|
+
def complete(self, message: str) -> None:
|
|
66
|
+
"""Complete the current task."""
|
|
67
|
+
if self._rich_progress and self._task is not None:
|
|
68
|
+
self._rich_progress.update(self._task, completed=100)
|
|
69
|
+
self._rich_progress.stop()
|
|
70
|
+
self._rich_progress = None
|
|
71
|
+
self._task = None
|
|
72
|
+
print_success(message)
|
|
File without changes
|
|
@@ -354,8 +354,7 @@ class HTMLReport:
|
|
|
354
354
|
is_hit = detail.get("hit", False)
|
|
355
355
|
cell_class = "cell-good" if is_hit else "cell-dead"
|
|
356
356
|
val = "1" if is_hit else "0"
|
|
357
|
-
html += f'<div class="heatmap-cell {cell_class}">{val}</div
|
|
358
|
-
'
|
|
357
|
+
html += f'<div class="heatmap-cell {cell_class}">{val}</div>\n'
|
|
359
358
|
return html
|
|
360
359
|
|
|
361
360
|
def _build_failures(self, failures: List[Dict]) -> str:
|
|
@@ -511,3 +510,49 @@ class HTMLReport:
|
|
|
511
510
|
def _build_chunk_details(self, chunks: List[Any]) -> str:
|
|
512
511
|
"""Build detailed chunk view."""
|
|
513
512
|
return ""
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def generate_report(
|
|
516
|
+
project_name: str,
|
|
517
|
+
overall_score: float | None,
|
|
518
|
+
retrieval_score: float | None,
|
|
519
|
+
faithfulness_score: float | None,
|
|
520
|
+
tests_passed: int,
|
|
521
|
+
tests_total: int,
|
|
522
|
+
failures: list[dict],
|
|
523
|
+
recommendations: list[dict],
|
|
524
|
+
current_score: float | None,
|
|
525
|
+
predicted_score: float | None,
|
|
526
|
+
chunk_strategy: str,
|
|
527
|
+
num_chunks: int,
|
|
528
|
+
chunk_histogram: list[dict],
|
|
529
|
+
chunk_details: list[dict],
|
|
530
|
+
avg_chunk_length: float,
|
|
531
|
+
context_loss_score: float,
|
|
532
|
+
heatmap_data: list[dict],
|
|
533
|
+
) -> str:
|
|
534
|
+
"""Generate HTML report from test results (compatibility wrapper).
|
|
535
|
+
|
|
536
|
+
Delegates to HTMLReport.generate() for consistent output.
|
|
537
|
+
"""
|
|
538
|
+
from ragcheck.core.config import Config
|
|
539
|
+
|
|
540
|
+
config = Config()
|
|
541
|
+
reporter = HTMLReport(config)
|
|
542
|
+
|
|
543
|
+
# Build data dict matching HTMLReport.generate() expectations
|
|
544
|
+
data = {
|
|
545
|
+
"project_name": project_name,
|
|
546
|
+
"retrieval_results": {
|
|
547
|
+
"score": (overall_score * 100) if overall_score is not None else 0,
|
|
548
|
+
"passed": tests_passed,
|
|
549
|
+
"total": tests_total,
|
|
550
|
+
"details": [],
|
|
551
|
+
},
|
|
552
|
+
"chunks": [],
|
|
553
|
+
"failures": failures,
|
|
554
|
+
"recommendations": recommendations,
|
|
555
|
+
"config": config,
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return reporter.generate(data)
|
|
File without changes
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Analyzers for ragcheck."""
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Core utilities for ragcheck."""
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"""Rich progress tracking for ragcheck operations."""
|
|
2
|
-
|
|
3
|
-
from rich.console import Console
|
|
4
|
-
from rich.progress import (
|
|
5
|
-
BarColumn,
|
|
6
|
-
Progress,
|
|
7
|
-
SpinnerColumn,
|
|
8
|
-
TextColumn,
|
|
9
|
-
TimeElapsedColumn,
|
|
10
|
-
TimeRemainingColumn,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
console = Console()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_progress() -> Progress:
|
|
17
|
-
"""Get a pre-configured Rich Progress instance."""
|
|
18
|
-
return Progress(
|
|
19
|
-
SpinnerColumn(),
|
|
20
|
-
TextColumn("[progress.description]{task.description}"),
|
|
21
|
-
BarColumn(),
|
|
22
|
-
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
23
|
-
TimeElapsedColumn(),
|
|
24
|
-
TimeRemainingColumn(),
|
|
25
|
-
console=console,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def print_success(message: str) -> None:
|
|
30
|
-
"""Print a success message."""
|
|
31
|
-
console.print(f"[bold green]OK[/bold green] {message}")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def print_warning(message: str) -> None:
|
|
35
|
-
"""Print a warning message."""
|
|
36
|
-
console.print(f"[bold yellow]⚠[/bold yellow] {message}")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def print_error(message: str) -> None:
|
|
40
|
-
"""Print an error message."""
|
|
41
|
-
console.print(f"[bold red]✗[/bold red] {message}")
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Report generation for ragcheck."""
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Test generation and retrieval testing for ragcheck."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|