rag-scorecard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: rag-scorecard
3
+ Version: 0.1.0
4
+ Summary: A CLI and SDK tool to audit RAG pipelines for systemic infrastructure weak points.
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic>=2.4.0
8
+ Requires-Dist: chromadb>=0.4.15
9
+ Requires-Dist: typer>=0.9.0
10
+ Requires-Dist: rich>=13.6.0
11
+ Requires-Dist: pyyaml>=6.0.1
12
+
13
+ # rag-scorecard
14
+
15
+ A CLI and SDK tool to audit RAG pipelines.
@@ -0,0 +1,3 @@
1
+ # rag-scorecard
2
+
3
+ A CLI and SDK tool to audit RAG pipelines.
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "rag-scorecard"
7
+ version = "0.1.0"
8
+ description = "A CLI and SDK tool to audit RAG pipelines for systemic infrastructure weak points."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "pydantic>=2.4.0",
13
+ "chromadb>=0.4.15",
14
+ "typer>=0.9.0",
15
+ "rich>=13.6.0",
16
+ "pyyaml>=6.0.1",
17
+ ]
18
+
19
+ [project.scripts]
20
+ rag-scorecard = "scorecard.cli:app"
21
+
22
+ [tool.setuptools]
23
+ packages = ["scorecard"]
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: rag-scorecard
3
+ Version: 0.1.0
4
+ Summary: A CLI and SDK tool to audit RAG pipelines for systemic infrastructure weak points.
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic>=2.4.0
8
+ Requires-Dist: chromadb>=0.4.15
9
+ Requires-Dist: typer>=0.9.0
10
+ Requires-Dist: rich>=13.6.0
11
+ Requires-Dist: pyyaml>=6.0.1
12
+
13
+ # rag-scorecard
14
+
15
+ A CLI and SDK tool to audit RAG pipelines.
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ rag_scorecard.egg-info/PKG-INFO
4
+ rag_scorecard.egg-info/SOURCES.txt
5
+ rag_scorecard.egg-info/dependency_links.txt
6
+ rag_scorecard.egg-info/entry_points.txt
7
+ rag_scorecard.egg-info/requires.txt
8
+ rag_scorecard.egg-info/top_level.txt
9
+ scorecard/__init__.py
10
+ scorecard/models.py
11
+ scorecard/reporter.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ rag-scorecard = scorecard.cli:app
@@ -0,0 +1,5 @@
1
+ pydantic>=2.4.0
2
+ chromadb>=0.4.15
3
+ typer>=0.9.0
4
+ rich>=13.6.0
5
+ pyyaml>=6.0.1
@@ -0,0 +1 @@
1
+ scorecard
@@ -0,0 +1 @@
1
+ __version__ = '0.1.0'
@@ -0,0 +1,23 @@
1
+
2
+ from typing import List, Optional, Dict, Any
3
+ from pydantic import BaseModel, Field, ConfigDict
4
+
5
+ class RAGSample(BaseModel):
6
+ model_config = ConfigDict(strict=True)
7
+
8
+ query: str = Field(..., description="The user query or prompt.")
9
+ retrieved_contexts: List[str] = Field(..., description="List of document strings returned by the retriever.")
10
+ context_scores: List[float] = Field(..., description="Distance or similarity scores corresponding to retrieved contexts.")
11
+ generated_response: Optional[str] = Field(None, description="The final generation from the LLM, if applicable.")
12
+ ground_truth: Optional[str] = Field(None, description="The expected factual answer or exact context substring to match against.")
13
+
14
+ class MetricResult(BaseModel):
15
+ metric_name: str
16
+ score: float
17
+ metadata: Dict[str, Any] = Field(default_factory=dict)
18
+
19
+ class EvaluationReport(BaseModel):
20
+ samples: List[RAGSample]
21
+ aggregated_metrics: Dict[str, float]
22
+ latency_breakdown_ms: Dict[str, float]
23
+ estimated_cost_usd: float
@@ -0,0 +1,24 @@
1
+ from scorecard.models import EvaluationReport
2
+
3
+ def print_terminal_scorecard(report: EvaluationReport):
4
+ print("\n--- RAG Scorecard Report ---")
5
+ print("Aggregated Metrics:")
6
+ for metric_name, score in report.aggregated_metrics.items():
7
+ print(f" {metric_name}: {score:.4f}")
8
+ print(f"Estimated Cost (USD): {report.estimated_cost_usd:.6f}")
9
+ print("Latency Breakdown (ms):")
10
+ for stage, latency in report.latency_breakdown_ms.items():
11
+ print(f" {stage}: {latency:.2f}")
12
+ print("---------------------------\n")
13
+
14
+ def export_markdown_scorecard(report: EvaluationReport) -> str:
15
+ md = "# RAG Scorecard Report\n\n"
16
+ md += "## Aggregated Metrics\n"
17
+ for metric_name, score in report.aggregated_metrics.items():
18
+ md += f"- **{metric_name}**: {score:.4f}\n"
19
+ md += "\n## Cost & Latency\n"
20
+ md += f"- **Estimated Cost (USD)**: {report.estimated_cost_usd:.6f}\n"
21
+ md += "\n### Latency Breakdown\n"
22
+ for stage, latency in report.latency_breakdown_ms.items():
23
+ md += f"- **{stage}**: {latency:.2f} ms\n"
24
+ return md
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+