ragcheck-cli 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/CHANGELOG.md +8 -1
  2. {ragcheck_cli-0.2.2/ragcheck_cli.egg-info → ragcheck_cli-0.2.3}/PKG-INFO +2 -1
  3. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/README.md +1 -0
  4. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/pyproject.toml +1 -1
  5. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/__init__.py +1 -1
  6. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/testers/auto_qa.py +22 -29
  7. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3/ragcheck_cli.egg-info}/PKG-INFO +2 -1
  8. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/LICENSE +0 -0
  9. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/MANIFEST.in +0 -0
  10. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/docs/ARCHITECTURE.md +0 -0
  11. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/chunk_demo.py +0 -0
  12. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/classifier_demo.py +0 -0
  13. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/demo.py +0 -0
  14. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/embed_demo.py +0 -0
  15. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/full_pipeline_demo.py +0 -0
  16. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/qa_demo.py +0 -0
  17. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/examples/report_demo.py +0 -0
  18. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/__main__.py +0 -0
  19. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/analyzers/__init__.py +0 -0
  20. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/analyzers/chunkers.py +0 -0
  21. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/analyzers/failure_classifier.py +0 -0
  22. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/analyzers/recommender.py +0 -0
  23. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/cli.py +0 -0
  24. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/__init__.py +0 -0
  25. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/config.py +0 -0
  26. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/config_loader.py +0 -0
  27. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/document_loader.py +0 -0
  28. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/embeddings.py +0 -0
  29. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/progress.py +0 -0
  30. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/core/vector_store.py +0 -0
  31. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/reports/__init__.py +0 -0
  32. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/reports/chunk_visualizer.py +0 -0
  33. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/reports/export.py +0 -0
  34. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/reports/generator.py +0 -0
  35. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/reports/html_report.py +0 -0
  36. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/testers/__init__.py +0 -0
  37. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck/testers/retrieval_tester.py +0 -0
  38. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/SOURCES.txt +0 -0
  39. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/dependency_links.txt +0 -0
  40. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/entry_points.txt +0 -0
  41. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/requires.txt +0 -0
  42. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/ragcheck_cli.egg-info/top_level.txt +0 -0
  43. {ragcheck_cli-0.2.2 → ragcheck_cli-0.2.3}/setup.cfg +0 -0
@@ -2,6 +2,13 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.2.2] - 2026-06-06
6
+
7
+ ### Fixed
8
+ - **Prompt size reduction** — Shrunk auto-QA prompt from ~1500 to ~800 chars + compact instructions. Stays well under Groq 6000 TPM limit.
9
+ - **Gemini 3+ compatibility** — Skip deprecated `temperature`/`top_p`/`top_k` params for `gemini/gemini-3.*` models to suppress deprecation warnings.
10
+ - **Better Q&A parsing** — Accept both `Q:/A:` and `Question:/Answer:` formats from LLM responses.
11
+
5
12
  ## [0.2.0] - 2026-06-04
6
13
 
7
14
  ### Added
@@ -33,4 +40,4 @@ All notable changes to this project will be documented in this file.
33
40
  - Recommendation engine with decision tree
34
41
  - Beautiful HTML reports (single file, no server)
35
42
  - CI/CD mode with GitHub Actions
36
- - PDF/PNG export via Playwright
43
+ - PDF/PNG export via Playwright
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragcheck-cli
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
5
5
  Author-email: Pranay Mane <pranaymane78@gmail.com>
6
6
  License: MIT
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
181
181
  ## Roadmap
182
182
 
183
183
  - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
184
+ - [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
184
185
  - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
185
186
  - [ ] v0.3.0 — SaaS API for teams
186
187
  - [ ] v0.4.0 — Enterprise features (SSO, audit logs)
@@ -140,6 +140,7 @@ MIT — see [LICENSE](LICENSE)
140
140
  ## Roadmap
141
141
 
142
142
  - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
143
+ - [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
143
144
  - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
144
145
  - [ ] v0.3.0 — SaaS API for teams
145
146
  - [ ] v0.4.0 — Enterprise features (SSO, audit logs)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ragcheck-cli"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Lighthouse for RAG systems - diagnose and fix your retrieval pipeline"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -1,3 +1,3 @@
1
1
  """ragcheck — Lighthouse for RAG systems."""
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.3"
@@ -1,7 +1,6 @@
1
1
  """Auto-generate test questions from documents using LLMs."""
2
2
 
3
3
  import os
4
- import random
5
4
  import re
6
5
  from typing import List
7
6
 
@@ -24,9 +23,8 @@ class AutoQA:
24
23
  if not chunks:
25
24
  return []
26
25
 
27
- # Sample chunks — use more for better coverage, weighted toward longer chunks
26
+ # Sample chunks — prefer longer, substantive chunks
28
27
  sample_size = min(self.config.max_qa_questions, len(chunks))
29
- # Sort by length descending to prefer substantive chunks
30
28
  sorted_chunks = sorted(chunks, key=len, reverse=True)
31
29
  sampled = sorted_chunks[:sample_size]
32
30
 
@@ -41,56 +39,54 @@ class AutoQA:
41
39
 
42
40
  def _generate_question(self, chunk_text: str, index: int) -> TestQuestion:
43
41
  """Generate a single question from a chunk."""
44
- # Truncate for prompt but keep enough context
45
- truncated = chunk_text[:1500]
42
+ # v0.2.2 FIX: Reduced from 1500 to 800 to stay well under Groq 6000 TPM
43
+ truncated = chunk_text[:800]
46
44
 
47
- prompt = f"""You are a legal document analyst. Given the following excerpt from an Indian legal document (BNS, BNSS, or BSA), generate ONE specific factual question that can be answered directly from this text. The question should ask about a specific section, provision, or legal procedure mentioned.
45
+ # v0.2.2 FIX: Compact prompt shaved ~60% token count
46
+ prompt = f"""Generate ONE factual Q&A from this legal excerpt:
48
47
 
49
- Excerpt:
50
48
  {truncated}
51
49
 
52
- Generate exactly one question in this format:
53
- Question: <your question here>
54
- Answer: <the exact answer from the text>
50
+ Format:
51
+ Q: <specific question about a section/provision>
52
+ A: <exact answer from text>
55
53
 
56
- Rules:
57
- - Question must be specific and factual (not "What is this about?")
58
- - Answer must be verbatim or closely paraphrased from the text
59
- - Focus on section numbers, legal procedures, penalties, or definitions
60
- """
54
+ Rules: Be specific. No "What is this about?". Cite section numbers if present."""
61
55
 
62
56
  try:
63
57
  kwargs = {
64
58
  "model": self.config.qa_model,
65
59
  "messages": [{"role": "user", "content": prompt}],
66
- "temperature": self.config.qa_temperature,
67
60
  "max_tokens": 300,
68
61
  }
69
62
 
70
- # Add API key if available
63
+ # v0.2.2 FIX: Gemini 3+ deprecates temperature/top_p/top_k — skip them
64
+ if not self.config.qa_model.startswith("gemini/gemini-3"):
65
+ kwargs["temperature"] = getattr(self.config, "qa_temperature", 0.3)
66
+
67
+ # API key handling
71
68
  api_key = os.environ.get("GROQ_API_KEY") or os.environ.get("OPENAI_API_KEY")
72
69
  if api_key:
73
70
  kwargs["api_key"] = api_key
74
71
 
75
- # CRITICAL FIX: Add base_url for Ollama
72
+ # Ollama base_url
76
73
  if self.config.qa_model.startswith("ollama/"):
77
74
  kwargs["base_url"] = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
78
- # Ollama doesn't need api_key
79
75
  kwargs.pop("api_key", None)
80
76
 
81
77
  response = completion(**kwargs)
82
78
  content = response.choices[0].message.content
83
79
 
84
- # Parse question and answer
85
- q_match = re.search(r"Question:\s*(.+?)(?=\nAnswer:|$)", content, re.DOTALL)
86
- a_match = re.search(r"Answer:\s*(.+?)$", content, re.DOTALL)
80
+ # Parse Q&A — handle both "Q:/A:" and "Question:/Answer:" formats
81
+ q_match = re.search(r"(?:Q|Question):\s*(.+?)(?=\n(?:A|Answer):|$)", content, re.DOTALL)
82
+ a_match = re.search(r"(?:A|Answer):\s*(.+?)$", content, re.DOTALL)
87
83
 
88
84
  if q_match and a_match:
89
85
  question_text = q_match.group(1).strip()
90
86
  answer_text = a_match.group(1).strip()
91
87
 
92
- # Validate: answer must be in chunk
93
- if answer_text.lower() in chunk_text.lower() or len(answer_text) > 20:
88
+ # Validate: answer must be in chunk or be substantial
89
+ if answer_text.lower() in chunk_text.lower() or len(answer_text) > 15:
94
90
  return TestQuestion(
95
91
  question=question_text,
96
92
  expected_answer=answer_text,
@@ -99,8 +95,7 @@ Rules:
99
95
  )
100
96
 
101
97
  except Exception as e:
102
- # Log but don't crash — fallback handled by caller
103
- if self.config.verbose:
98
+ if getattr(self.config, "verbose", False):
104
99
  print(f"QA generation failed for chunk {index}: {e}")
105
100
 
106
101
  return None
@@ -109,10 +104,8 @@ Rules:
109
104
  def generate_dummy_questions(chunks: List[str]) -> List[TestQuestion]:
110
105
  """Fallback: create questions from chunk content directly."""
111
106
  questions = []
112
- for i, chunk in enumerate(chunks[:10]): # Limit to avoid spam
113
- # Extract first sentence or first 100 chars as the "question"
107
+ for i, chunk in enumerate(chunks[:10]):
114
108
  first_sentence = chunk.split(".")[0] if "." in chunk else chunk[:100]
115
- # Create a question that references the actual content
116
109
  questions.append(TestQuestion(
117
110
  question=f"What does the document state regarding: {first_sentence[:80]}?",
118
111
  expected_answer=chunk,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragcheck-cli
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Lighthouse for RAG systems - diagnose and fix your retrieval pipeline
5
5
  Author-email: Pranay Mane <pranaymane78@gmail.com>
6
6
  License: MIT
@@ -181,6 +181,7 @@ MIT — see [LICENSE](LICENSE)
181
181
  ## Roadmap
182
182
 
183
183
  - [x] v0.2.0 — Offline reports, NLI faithfulness, scaled auto-QA, chunk viz
184
+ - [x] v0.2.2 — Prompt size fix, Gemini 3+ support, Groq TPM compliance
184
185
  - [ ] v0.3.0 — More vector DBs (Pinecone, Weaviate)
185
186
  - [ ] v0.3.0 — SaaS API for teams
186
187
  - [ ] v0.4.0 — Enterprise features (SSO, audit logs)
File without changes
File without changes
File without changes