PyPI - ragcheck-cli - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

ragcheck-cli 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,13 @@
 All notable changes to this project will be documented in this file.
+## [0.2.2] - 2026-06-06
+### Fixed
+- **Prompt size reduction** — Shrunk auto-QA prompt from ~1500 to ~800 chars + compact instructions. Stays well under Groq 6000 TPM limit.
+- **Gemini 3+ compatibility** — Skip deprecated `temperature`/`top_p`/`top_k` params for `gemini/gemini-3.*` models to suppress deprecation warnings.
+- **Better Q&A parsing** — Accept both `Q:/A:` and `Question:/Answer:` formats from LLM responses.
 ## [0.2.0] - 2026-06-04
 ### Added
@@ -33,4 +40,4 @@ All notable changes to this project will be documented in this file.
 - Recommendation engine with decision tree
 - Beautiful HTML reports (single file, no server)
 - CI/CD mode with GitHub Actions
-- PDF/PNG export via Playwright
+- PDF/PNG export via Playwright

{ragcheck_cli-0.2.2/ragcheck_cli.egg-info → ragcheck_cli-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragcheck-cli
-Version: 0.2.2
+Version: 0.2.4
 Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
 Author-email: Pranay Mane <pranaymane78@gmail.com>
 License: MIT
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
 ## Roadmap
 - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
+- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
 - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
 - [ ] v0.3.0 — SaaS API for teams
 - [ ] v0.4.0 — Enterprise features (SSO, audit logs)

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/README.md RENAMED Viewed

@@ -140,6 +140,7 @@ MIT — see [LICENSE](LICENSE)
 ## Roadmap
 - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
+- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
 - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
 - [ ] v0.3.0 — SaaS API for teams
 - [ ] v0.4.0 — Enterprise features (SSO, audit logs)

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ragcheck-cli"
-version = "0.2.2"
+version = "0.2.4"
 description = "Lighthouse for RAG systems - diagnose and fix your retrieval pipeline"
 readme = "README.md"
 license = {text = "MIT"}

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """ragcheck — Lighthouse for RAG systems."""
-__version__ = "0.2.2"
+__version__ = "0.2.4"

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/cli.py RENAMED Viewed

@@ -146,14 +146,11 @@ def run(
         if not questions:
             # FALLBACK: Generate meaningful questions from chunk content
             warnings.warn(
-                "LLM question generation failed. Using content-based fallback questions. "
-                "To use a real LLM:
-"
-                "  1. Get a free Groq key: https://console.groq.com/keys
-"
-                "  2. Run: set GROQ_API_KEY=your_key (Windows)
-"
-                "  3. Or ensure Ollama is running: ollama run phi3:mini",
+                """LLM question generation failed. Using content-based fallback questions.
+To use a real LLM:
+  1. Get a free Groq key: https://console.groq.com/keys
+  2. Run: set GROQ_API_KEY=your_key (Windows)
+  3. Or ensure Ollama is running: ollama run phi3:mini""",
                 UserWarning,
             )
             questions = generate_dummy_questions([c.text for c in chunks])
@@ -211,12 +208,12 @@ def run(
     # Summary
     score = retrieval_results["score"]
     color = "green" if score >= 80 else "yellow" if score >= 60 else "red"
-    console.print(f"
-[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/[{color}]]")
+    console.print(f"\n[{color}]Tests: {retrieval_results['passed']}/{retrieval_results['total']} passed | Score: {score}%[/[{color}]]")
     if score < 100:
-        console.print("
-[bold]Top Recommendations:[/bold]")
+        console.print("\n[bold]Top Recommendations:[/bold]")
         for rec in recommendations[:3]:
             console.print(f"  • {rec['title']}: {rec['description'][:60]}...")

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4}/ragcheck/testers/auto_qa.py RENAMED Viewed

@@ -1,7 +1,6 @@
 """Auto-generate test questions from documents using LLMs."""
 import os
-import random
 import re
 from typing import List
@@ -24,9 +23,8 @@ class AutoQA:
         if not chunks:
             return []
-        # Sample chunks — use more for better coverage, weighted toward longer chunks
+        # Sample chunks — prefer longer, substantive chunks
         sample_size = min(self.config.max_qa_questions, len(chunks))
-        # Sort by length descending to prefer substantive chunks
         sorted_chunks = sorted(chunks, key=len, reverse=True)
         sampled = sorted_chunks[:sample_size]
@@ -41,56 +39,54 @@ class AutoQA:
     def _generate_question(self, chunk_text: str, index: int) -> TestQuestion:
         """Generate a single question from a chunk."""
-        # Truncate for prompt but keep enough context
-        truncated = chunk_text[:1500]
+        # v0.2.2 FIX: Reduced from 1500 to 800 to stay well under Groq 6000 TPM
+        truncated = chunk_text[:800]
-        prompt = f"""You are a legal document analyst. Given the following excerpt from an Indian legal document (BNS, BNSS, or BSA), generate ONE specific factual question that can be answered directly from this text. The question should ask about a specific section, provision, or legal procedure mentioned.
+        # v0.2.2 FIX: Compact prompt — shaved ~60% token count
+        prompt = f"""Generate ONE factual Q&A from this legal excerpt:
-Excerpt:
 {truncated}
-Generate exactly one question in this format:
-Question: <your question here>
-Answer: <the exact answer from the text>
+Format:
+Q: <specific question about a section/provision>
+A: <exact answer from text>
-Rules:
-- Question must be specific and factual (not "What is this about?")
-- Answer must be verbatim or closely paraphrased from the text
-- Focus on section numbers, legal procedures, penalties, or definitions
-"""
+Rules: Be specific. No "What is this about?". Cite section numbers if present."""
         try:
             kwargs = {
                 "model": self.config.qa_model,
                 "messages": [{"role": "user", "content": prompt}],
-                "temperature": self.config.qa_temperature,
                 "max_tokens": 300,
             }
-            # Add API key if available
+            # v0.2.2 FIX: Gemini 3+ deprecates temperature/top_p/top_k — skip them
+            if not self.config.qa_model.startswith("gemini/gemini-3"):
+                kwargs["temperature"] = getattr(self.config, "qa_temperature", 0.3)
+            # API key handling
             api_key = os.environ.get("GROQ_API_KEY") or os.environ.get("OPENAI_API_KEY")
             if api_key:
                 kwargs["api_key"] = api_key
-            # CRITICAL FIX: Add base_url for Ollama
+            # Ollama base_url
             if self.config.qa_model.startswith("ollama/"):
                 kwargs["base_url"] = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
-                # Ollama doesn't need api_key
                 kwargs.pop("api_key", None)
             response = completion(**kwargs)
             content = response.choices[0].message.content
-            # Parse question and answer
-            q_match = re.search(r"Question:\s*(.+?)(?=\nAnswer:|$)", content, re.DOTALL)
-            a_match = re.search(r"Answer:\s*(.+?)$", content, re.DOTALL)
+            # Parse Q&A — handle both "Q:/A:" and "Question:/Answer:" formats
+            q_match = re.search(r"(?:Q|Question):\s*(.+?)(?=\n(?:A|Answer):|$)", content, re.DOTALL)
+            a_match = re.search(r"(?:A|Answer):\s*(.+?)$", content, re.DOTALL)
             if q_match and a_match:
                 question_text = q_match.group(1).strip()
                 answer_text = a_match.group(1).strip()
-                # Validate: answer must be in chunk
-                if answer_text.lower() in chunk_text.lower() or len(answer_text) > 20:
+                # Validate: answer must be in chunk or be substantial
+                if answer_text.lower() in chunk_text.lower() or len(answer_text) > 15:
                     return TestQuestion(
                         question=question_text,
                         expected_answer=answer_text,
@@ -99,8 +95,7 @@ Rules:
                     )
         except Exception as e:
-            # Log but don't crash — fallback handled by caller
-            if self.config.verbose:
+            if getattr(self.config, "verbose", False):
                 print(f"QA generation failed for chunk {index}: {e}")
         return None
@@ -109,10 +104,8 @@ Rules:
 def generate_dummy_questions(chunks: List[str]) -> List[TestQuestion]:
     """Fallback: create questions from chunk content directly."""
     questions = []
-    for i, chunk in enumerate(chunks[:10]):  # Limit to avoid spam
-        # Extract first sentence or first 100 chars as the "question"
+    for i, chunk in enumerate(chunks[:10]):
         first_sentence = chunk.split(".")[0] if "." in chunk else chunk[:100]
-        # Create a question that references the actual content
         questions.append(TestQuestion(
             question=f"What does the document state regarding: {first_sentence[:80]}?",
             expected_answer=chunk,

{ragcheck_cli-0.2.2 → ragcheck_cli-0.2.4/ragcheck_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragcheck-cli
-Version: 0.2.2
+Version: 0.2.4
 Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
 Author-email: Pranay Mane <pranaymane78@gmail.com>
 License: MIT
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
 ## Roadmap
 - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
+- [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
 - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
 - [ ] v0.3.0 — SaaS API for teams
 - [ ] v0.4.0 — Enterprise features (SSO, audit logs)