ragcheck-cli 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/CHANGELOG.md +8 -1
- {ragcheck_cli-0.2.2/ragcheck_cli.egg-info → ragcheck_cli-0.2.4}/PKG-INFO +2 -1
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/README.md +1 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/pyproject.toml +1 -1
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/__init__.py +1 -1
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/cli.py +9 -12
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/testers/auto_qa.py +22 -29
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4/ragcheck_cli.egg-info}/PKG-INFO +2 -1
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/LICENSE +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/MANIFEST.in +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/docs/ARCHITECTURE.md +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/chunk_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/classifier_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/embed_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/full_pipeline_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/qa_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/examples/report_demo.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/__main__.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/analyzers/__init__.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/analyzers/chunkers.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/analyzers/failure_classifier.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/analyzers/recommender.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/__init__.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/config.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/config_loader.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/document_loader.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/embeddings.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/progress.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/core/vector_store.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/reports/__init__.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/reports/chunk_visualizer.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/reports/export.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/reports/generator.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/reports/html_report.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/testers/__init__.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/testers/retrieval_tester.py +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck_cli.egg-info/SOURCES.txt +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck_cli.egg-info/dependency_links.txt +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck_cli.egg-info/entry_points.txt +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck_cli.egg-info/requires.txt +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck_cli.egg-info/top_level.txt +0 -0
- {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/setup.cfg +0 -0
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.2] - 2026-06-06
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **Prompt size reduction** — Shrunk auto-QA prompt from ~1500 to ~800 chars + compact instructions. Stays well under Groq 6000 TPM limit.
|
|
9
|
+
- **Gemini 3+ compatibility** — Skip deprecated `temperature`/`top_p`/`top_k` params for `gemini/gemini-3.*` models to suppress deprecation warnings.
|
|
10
|
+
- **Better Q&A parsing** — Accept both `Q:/A:` and `Question:/Answer:` formats from LLM responses.
|
|
11
|
+
|
|
5
12
|
## [0.2.0] - 2026-06-04
|
|
6
13
|
|
|
7
14
|
### Added
|
|
@@ -33,4 +40,4 @@ All notable changes to this project will be documented in this file.
|
|
|
33
40
|
- Recommendation engine with decision tree
|
|
34
41
|
- Beautiful HTML reports (single file, no server)
|
|
35
42
|
- CI/CD mode with GitHub Actions
|
|
36
|
-
- PDF/PNG export via Playwright
|
|
43
|
+
- PDF/PNG export via Playwright
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragcheck-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
|
|
5
5
|
Author-email: Pranay Mane <pranaymane78@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
|
|
|
181
181
|
## Roadmap
|
|
182
182
|
|
|
183
183
|
- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
|
|
184
|
+
- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
|
|
184
185
|
- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
|
|
185
186
|
- [ ] v0.3.0 — SaaS API for teams
|
|
186
187
|
- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
|
|
@@ -140,6 +140,7 @@ MIT — see [LICENSE](LICENSE)
|
|
|
140
140
|
## Roadmap
|
|
141
141
|
|
|
142
142
|
- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
|
|
143
|
+
- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
|
|
143
144
|
- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
|
|
144
145
|
- [ ] v0.3.0 — SaaS API for teams
|
|
145
146
|
- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
|
|
@@ -146,14 +146,11 @@ def run(
|
|
|
146
146
|
if not questions:
|
|
147
147
|
# FALLBACK: Generate meaningful questions from chunk content
|
|
148
148
|
warnings.warn(
|
|
149
|
-
"LLM question generation failed. Using content-based fallback questions.
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
"
|
|
154
|
-
" 2. Run: set GROQ_API_KEY=your_key (Windows)
|
|
155
|
-
"
|
|
156
|
-
" 3. Or ensure Ollama is running: ollama run phi3:mini",
|
|
149
|
+
"""LLM question generation failed. Using content-based fallback questions.
|
|
150
|
+
To use a real LLM:
|
|
151
|
+
1. Get a free Groq key: https://console.groq.com/keys
|
|
152
|
+
2. Run: set GROQ_API_KEY=your_key (Windows)
|
|
153
|
+
3. Or ensure Ollama is running: ollama run phi3:mini""",
|
|
157
154
|
UserWarning,
|
|
158
155
|
)
|
|
159
156
|
questions = generate_dummy_questions([c.text for c in chunks])
|
|
@@ -211,12 +208,12 @@ def run(
|
|
|
211
208
|
# Summary
|
|
212
209
|
score = retrieval_results["score"]
|
|
213
210
|
color = "green" if score >= 80 else "yellow" if score >= 60 else "red"
|
|
214
|
-
console.print(f"
|
|
215
|
-
|
|
211
|
+
console.print(f"\n[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/[{color}]]")
|
|
212
|
+
|
|
216
213
|
|
|
217
214
|
if score < 100:
|
|
218
|
-
console.print("
|
|
219
|
-
|
|
215
|
+
console.print("\n[bold]Top Recommendations:[/bold]")
|
|
216
|
+
|
|
220
217
|
for rec in recommendations[:3]:
|
|
221
218
|
console.print(f" • {rec['title']}: {rec['description'][:60]}...")
|
|
222
219
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Auto-generate test questions from documents using LLMs."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
import random
|
|
5
4
|
import re
|
|
6
5
|
from typing import List
|
|
7
6
|
|
|
@@ -24,9 +23,8 @@ class AutoQA:
|
|
|
24
23
|
if not chunks:
|
|
25
24
|
return []
|
|
26
25
|
|
|
27
|
-
# Sample chunks —
|
|
26
|
+
# Sample chunks — prefer longer, substantive chunks
|
|
28
27
|
sample_size = min(self.config.max_qa_questions, len(chunks))
|
|
29
|
-
# Sort by length descending to prefer substantive chunks
|
|
30
28
|
sorted_chunks = sorted(chunks, key=len, reverse=True)
|
|
31
29
|
sampled = sorted_chunks[:sample_size]
|
|
32
30
|
|
|
@@ -41,56 +39,54 @@ class AutoQA:
|
|
|
41
39
|
|
|
42
40
|
def _generate_question(self, chunk_text: str, index: int) -> TestQuestion:
|
|
43
41
|
"""Generate a single question from a chunk."""
|
|
44
|
-
#
|
|
45
|
-
truncated = chunk_text[:
|
|
42
|
+
# v0.2.2 FIX: Reduced from 1500 to 800 to stay well under Groq 6000 TPM
|
|
43
|
+
truncated = chunk_text[:800]
|
|
46
44
|
|
|
47
|
-
|
|
45
|
+
# v0.2.2 FIX: Compact prompt — shaved ~60% token count
|
|
46
|
+
prompt = f"""Generate ONE factual Q&A from this legal excerpt:
|
|
48
47
|
|
|
49
|
-
Excerpt:
|
|
50
48
|
{truncated}
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
50
|
+
Format:
|
|
51
|
+
Q: <specific question about a section/provision>
|
|
52
|
+
A: <exact answer from text>
|
|
55
53
|
|
|
56
|
-
Rules:
|
|
57
|
-
- Question must be specific and factual (not "What is this about?")
|
|
58
|
-
- Answer must be verbatim or closely paraphrased from the text
|
|
59
|
-
- Focus on section numbers, legal procedures, penalties, or definitions
|
|
60
|
-
"""
|
|
54
|
+
Rules: Be specific. No "What is this about?". Cite section numbers if present."""
|
|
61
55
|
|
|
62
56
|
try:
|
|
63
57
|
kwargs = {
|
|
64
58
|
"model": self.config.qa_model,
|
|
65
59
|
"messages": [{"role": "user", "content": prompt}],
|
|
66
|
-
"temperature": self.config.qa_temperature,
|
|
67
60
|
"max_tokens": 300,
|
|
68
61
|
}
|
|
69
62
|
|
|
70
|
-
#
|
|
63
|
+
# v0.2.2 FIX: Gemini 3+ deprecates temperature/top_p/top_k — skip them
|
|
64
|
+
if not self.config.qa_model.startswith("gemini/gemini-3"):
|
|
65
|
+
kwargs["temperature"] = getattr(self.config, "qa_temperature", 0.3)
|
|
66
|
+
|
|
67
|
+
# API key handling
|
|
71
68
|
api_key = os.environ.get("GROQ_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
72
69
|
if api_key:
|
|
73
70
|
kwargs["api_key"] = api_key
|
|
74
71
|
|
|
75
|
-
#
|
|
72
|
+
# Ollama base_url
|
|
76
73
|
if self.config.qa_model.startswith("ollama/"):
|
|
77
74
|
kwargs["base_url"] = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
|
78
|
-
# Ollama doesn't need api_key
|
|
79
75
|
kwargs.pop("api_key", None)
|
|
80
76
|
|
|
81
77
|
response = completion(**kwargs)
|
|
82
78
|
content = response.choices[0].message.content
|
|
83
79
|
|
|
84
|
-
# Parse
|
|
85
|
-
q_match = re.search(r"Question:\s*(.+?)(?=\
|
|
86
|
-
a_match = re.search(r"Answer:\s*(.+?)$", content, re.DOTALL)
|
|
80
|
+
# Parse Q&A — handle both "Q:/A:" and "Question:/Answer:" formats
|
|
81
|
+
q_match = re.search(r"(?:Q|Question):\s*(.+?)(?=\n(?:A|Answer):|$)", content, re.DOTALL)
|
|
82
|
+
a_match = re.search(r"(?:A|Answer):\s*(.+?)$", content, re.DOTALL)
|
|
87
83
|
|
|
88
84
|
if q_match and a_match:
|
|
89
85
|
question_text = q_match.group(1).strip()
|
|
90
86
|
answer_text = a_match.group(1).strip()
|
|
91
87
|
|
|
92
|
-
# Validate: answer must be in chunk
|
|
93
|
-
if answer_text.lower() in chunk_text.lower() or len(answer_text) >
|
|
88
|
+
# Validate: answer must be in chunk or be substantial
|
|
89
|
+
if answer_text.lower() in chunk_text.lower() or len(answer_text) > 15:
|
|
94
90
|
return TestQuestion(
|
|
95
91
|
question=question_text,
|
|
96
92
|
expected_answer=answer_text,
|
|
@@ -99,8 +95,7 @@ Rules:
|
|
|
99
95
|
)
|
|
100
96
|
|
|
101
97
|
except Exception as e:
|
|
102
|
-
|
|
103
|
-
if self.config.verbose:
|
|
98
|
+
if getattr(self.config, "verbose", False):
|
|
104
99
|
print(f"QA generation failed for chunk {index}: {e}")
|
|
105
100
|
|
|
106
101
|
return None
|
|
@@ -109,10 +104,8 @@ Rules:
|
|
|
109
104
|
def generate_dummy_questions(chunks: List[str]) -> List[TestQuestion]:
|
|
110
105
|
"""Fallback: create questions from chunk content directly."""
|
|
111
106
|
questions = []
|
|
112
|
-
for i, chunk in enumerate(chunks[:10]):
|
|
113
|
-
# Extract first sentence or first 100 chars as the "question"
|
|
107
|
+
for i, chunk in enumerate(chunks[:10]):
|
|
114
108
|
first_sentence = chunk.split(".")[0] if "." in chunk else chunk[:100]
|
|
115
|
-
# Create a question that references the actual content
|
|
116
109
|
questions.append(TestQuestion(
|
|
117
110
|
question=f"What does the document state regarding: {first_sentence[:80]}?",
|
|
118
111
|
expected_answer=chunk,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragcheck-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
|
|
5
5
|
Author-email: Pranay Mane <pranaymane78@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
|
|
|
181
181
|
## Roadmap
|
|
182
182
|
|
|
183
183
|
- [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
|
|
184
|
+
- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
|
|
184
185
|
- [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
|
|
185
186
|
- [ ] v0.3.0 — SaaS API for teams
|
|
186
187
|
- [ ] v0.4.0 — Enterprise features (SSO, audit logs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|