PyPI - kssrag - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

kssrag 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{kssrag-0.2.3 → kssrag-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kssrag
-Version: 0.2.3
+Version: 0.2.4
 Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
 Home-page: https://github.com/Ksschkw/kssrag
 Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.3-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -809,6 +809,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.3 → kssrag-0.2.4}/README.md RENAMED Viewed

@@ -4,7 +4,7 @@
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.3-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -728,6 +728,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/agents.py RENAMED Viewed

@@ -86,43 +86,55 @@ class RAGAgent:
         for i, doc in enumerate(context_docs, 1):
             context += f"\n--- Document {i} ---\n{doc['content']}\n"
         return context
     def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
-        """Build messages for LLM including context and conversation summaries"""
-        # Start with conversation history
-        messages = self.conversation.copy()
+        """
+        Build messages for the LLM including context, conversation history, and summaries.
+        Improvements:
+        - Prevents token explosion by trimming conversation smartly
+        - Injects last 3 summaries only
+        - Adds stealth summarization only if there are at least 2 user-assistant exchanges
+        - Preserves system messages and formatting
+        """
+        # Start with system + conversation history
+        messages: List[Dict[str, str]] = []
+        # Always include system message at top
+        system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
+        if system_msg:
+            messages.append(system_msg)
+        # Keep only last 12 user/assistant messages to prevent token overload
+        conversation_tail = [msg for msg in self.conversation if msg["role"] != "system"][-12:]
+        messages.extend(conversation_tail)
         logger.info(f"Building messages for query: '{question}'")
-        logger.info(f"Conversation history: {len(self.conversation)} messages")
+        logger.info(f"Conversation tail: {len(conversation_tail)} messages")
         logger.info(f"Active summaries: {len(self.conversation_summaries)}")
-        logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
+        logger.info(f"Context length: {len(context)} chars" if context else "No retrieved context")
-        # Add conversation summaries as context if available
+        # Inject last 5 summaries safely as a system message
         if self.conversation_summaries:
-            logger.info(f"Using summaries: {self.conversation_summaries}")
-            summary_context = "Previous conversation context:\n" + "\n".join(
-                f"- {summary}" for summary in self.conversation_summaries[-3:]  # Last 3 summaries
-            )
-            messages.append({
-                "role": "system",
-                "content": summary_context
-            })
-        # Add retrieved document context
-        user_message = f"{context}\n\nQuestion: {question}" if context else question
-        # ✅ FIX: Always append new user message (don't replace existing ones)
-        messages.append({"role": "user", "content": user_message})
-        # Add stealth summarization instruction for ongoing conversations
-        if len(self.conversation) >= 1:  # More than just system + current user message + 2nd Query
+            summaries_to_use = self.conversation_summaries[-5:]
+            summary_context = "Previous conversation context:\n" + "\n".join(f"- {s}" for s in summaries_to_use)
+            messages.append({"role": "system", "content": summary_context})
+            logger.info(f"Injected {len(summaries_to_use)} conversation summaries")
+        # Add the user's current question + retrieved context
+        user_content = f"{context}\n\nQuestion: {question}" if context else question
+        messages.append({"role": "user", "content": user_content})
+        # Add stealth summarization only if conversation has at least 2 user-assistant pairs
+        exchange_count = sum(1 for msg in self.conversation if msg["role"] != "system") // 2
+        if exchange_count >= 2:
             summary_instruction = self._create_summary_instruction()
             messages.append({"role": "system", "content": summary_instruction})
-            logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
-            logger.debug(f"Instruction content: {summary_instruction}")
+            logger.info(f"Stealth summary instruction added ({len(summary_instruction)} chars)")
-        logger.info(f" Final message count to LLM: {len(messages)}")
+        logger.info(f"Final message count to LLM: {len(messages)}")
         return messages
     def _create_summary_instruction(self) -> str:
         """Create the stealth summarization instruction with examples"""
@@ -147,37 +159,8 @@ class RAGAgent:
     The summary will be automatically hidden from the user."""
-    # def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
-    #     """Extract summary from response and return clean user response - handles partial markers"""
-    #     # Keep original markers for backward compatibility
-    #     summary_start = "[SUMMARY_START]"
-    #     summary_end = "[SUMMARY_END]"
-    #     # NEW: Normalize the response first (improvement from new version)
-    #     normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
-    #     # Check if we have complete markers - KEEP original logic but use normalized
-    #     if summary_start in normalized and summary_end in normalized:
-    #         start_idx = normalized.find(summary_start) + len(summary_start)
-    #         end_idx = normalized.find(summary_end)
-    #         summary = normalized[start_idx:end_idx].strip()
-    #         user_response = normalized[:normalized.find(summary_start)].strip()
-    #         logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
-    #         logger.info(f"User response length: {len(user_response)} chars")
-    #         logger.info(f"Summary extracted: '{summary}'")
-    #         # NEW: Add validation from improved version
-    #         if not summary or len(summary) < 5:
-    #             logger.info("❌ Summary too short, returning full response")
-    #             return full_response.strip(), None
-    #         return user_response, summary
     def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
-        """Extract summary from response and return clean user response."""
+        """Extract summary from response and return clean user response safely."""
         if not full_response:
             return "", None
@@ -187,7 +170,7 @@ class RAGAgent:
         original = full_response
         normalized = original.replace('\r\n', '\n').replace('\r', '\n')
-        # Case 1: Complete markers
+        # Case 1: Full summary markers
         if summary_start in normalized and summary_end in normalized:
             start_idx = normalized.find(summary_start) + len(summary_start)
             end_idx = normalized.find(summary_end)
@@ -196,13 +179,12 @@ class RAGAgent:
             user_response = original.split(summary_start)[0].strip()
             if not summary or len(summary) < 5:
-                logger.info("Summary too short or invalid")
+                logger.info("Summary too short or invalid – returning full response as user response")
                 return original.strip(), None
-            logger.info("Summary extracted successfully")
             return user_response, summary
-        # Case 2: Partial marker (start only)
+        # Case 2: Partial summary start only
         if summary_start in normalized:
             start_idx = normalized.find(summary_start) + len(summary_start)
             potential = normalized[start_idx:start_idx + 200].strip()
@@ -218,40 +200,15 @@ class RAGAgent:
             user_response = original.split(summary_start)[0].strip()
             if cleaned_summary and len(cleaned_summary) >= 10:
-                logger.info("Partial summary extracted")
+                logger.info("Partial summary extracted safely")
                 return user_response, cleaned_summary
-            logger.info("Partial summary invalid")
+            logger.info("Partial summary invalid or too short")
             return original.strip(), None
-        # Case 3: No markers at all
-        logger.info("No summary markers found")
-        # No markers found - KEEP original but with normalization
-        # logger.info(" No summary markers found, returning full response")
-        logger.info(f"Full response length: {len(original)} chars")
+        # Case 3: No markers
         return original.strip(), None
-        # return full_response.strip(), None  # NEW: strip for consistency
-    # def _add_conversation_summary(self, new_summary: str):
-    #     """Add a new discrete conversation summary"""
-    #     if not new_summary or new_summary.lower() == "none":
-    #         logger.info("🔄 No summary to add (empty or 'none')")
-    #         return
-    #     # Add as a new discrete summary
-    #     self.conversation_summaries.append(new_summary)
-    #     logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
-    #     # Keep only recent summaries (last 7)
-    #     if len(self.conversation_summaries) > 7:
-    #         self.conversation_summaries = self.conversation_summaries[-7:]
-    #         removed = self.conversation_summaries.pop(0)
-    #         logger.info(f"🗑️  DROPPED Oldest summary: '{removed}'")
-    #         logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
-    #     logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
     def _add_conversation_summary(self, new_summary: str):
         """Add a new discrete conversation summary"""
         if not new_summary or new_summary.lower() == "none":
@@ -339,49 +296,75 @@ class RAGAgent:
             logger.error(f" ALL STREAMING STRATEGIES FAILED: {str(e)}")
             yield f"Error: {str(e)}"
+    # def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
+    #     """Streaming-safe: never leak summary markers mid-stream."""
+    #     relevant_docs = self.retriever.retrieve(question, top_k=top_k)
+    #     context = self._build_context(relevant_docs)
+    #     messages = self._build_messages(question, context)
+    #     buffer = ""
+    #     summary_buffer = ""
+    #     in_summary = False
+    #     for chunk in self.llm.predict_stream(messages):
+    #         buffer += chunk
+    #         # Detect summary start
+    #         if '[SUMMARY_START]' in buffer:
+    #             in_summary = True
+    #             clean_part = buffer.split('[SUMMARY_START]')[0].strip()
+    #             if clean_part:
+    #                 yield clean_part
+    #             summary_buffer = buffer.split('[SUMMARY_START]')[1]
+    #             buffer = ""
+    #             continue
+    #         if in_summary:
+    #             summary_buffer += chunk
+    #             if '[SUMMARY_END]' in summary_buffer:
+    #                 in_summary = False
+    #                 summary_content = summary_buffer.split('[SUMMARY_END]')[0].strip()
+    #                 if summary_content:
+    #                     self._add_conversation_summary(summary_content)
+    #                     logger.info(f"Summary extracted in stream: '{summary_content}'")
+    #                 buffer = summary_buffer.split('[SUMMARY_END]')[1]  # remainder
+    #                 summary_buffer = ""
+    #                 if buffer:
+    #                     yield buffer.strip()
+    #                 buffer = ""
+    #             continue
+    #         if not in_summary:
+    #             yield chunk
+    #     # Flush leftover buffer
+    #     if buffer.strip() and not in_summary:
+    #         yield buffer.strip()
+    #     elif in_summary:
+    #         logger.info("Leftover buffer contains partial summary – discarded to prevent marker leak")
     def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
-        """True streaming with better error handling"""
-        try:
-            relevant_docs = self.retriever.retrieve(question, top_k=top_k)
-            context = self._build_context(relevant_docs)
-            messages = self._build_messages(question, context)
-            buffer = ""
-            summary_started = False
-            for chunk in self.llm.predict_stream(messages):
-                buffer += chunk
-                # Check for summary markers
-                if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
-                    if not summary_started:
-                        logger.info(" Summary markers detected - cutting stream")
-                        summary_started = True
-                        clean_part = self._extract_clean_content(buffer)
-                        if clean_part:
-                            yield clean_part
-                        # Don't break here - let the method complete naturally
-                        continue
-                if not summary_started:
-                    yield chunk
-            # Process the complete response
-            self._process_complete_response(buffer)
-        except Exception as e:
-            logger.error(f"Streaming error: {e}")
-            raise  # Re-raise to trigger fallback
+        """Token-only streaming. Never reconstruct or re-emit content."""
+        relevant_docs = self.retriever.retrieve(question, top_k=top_k)
+        context = self._build_context(relevant_docs)
+        messages = self._build_messages(question, context)
+        buffer = ""
+        for chunk in self.llm.predict_stream(messages):
+            buffer += chunk
+            # The moment summary markers appear, stop streaming to client
+            if '[SUMMARY_START]' in buffer or 'SUMMARY_' in buffer:
+                logger.info("Summary marker detected — stopping client stream")
+                break
+            # Yield ONLY raw tokens
+            yield chunk
+        # After streaming finishes, process full response exactly once
+        self._process_complete_response(buffer)
-    # def _process_complete_response(self, full_response: str):
-    #     """Process complete response and extract summary"""
-    #     user_response, conversation_summary = self._extract_summary_and_response(full_response)
-    #     if conversation_summary:
-    #         logger.info(f" Summary extracted: '{conversation_summary}'")
-    #         self._add_conversation_summary(conversation_summary)
-    #     self.add_message("assistant", user_response)
     def _process_complete_response(self, full_response: str):
         """Process complete response and extract summary"""
         user_response, conversation_summary = self._extract_summary_and_response(full_response)
@@ -398,31 +381,27 @@ class RAGAgent:
             else:
                 logger.info("Skipped adding duplicate assistant message in _process_complete_response.")
     def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
-        """Simulated streaming that guarantees no summary leakage"""
+        """Simulated streaming that guarantees no summary leakage."""
         relevant_docs = self.retriever.retrieve(question, top_k=top_k)
         context = self._build_context(relevant_docs)
         messages = self._build_messages(question, context)
-        # Get complete response
         complete_response = self.llm.predict(messages)
-        # Extract clean response
         user_response, conversation_summary = self._extract_summary_and_response(complete_response)
         if conversation_summary:
-            logger.info(f" Summary extracted: '{conversation_summary}'")
             self._add_conversation_summary(conversation_summary)
         self.add_message("assistant", user_response)
-        # Simulate streaming (smaller chunks for better UX)
-        chunk_size = 2  # Even smaller chunks for smoother streaming
-        for i in range(0, len(user_response), chunk_add_conversation_summary_size):
-            yield user_response[i:i+chunk_size]
+        # Simulate streaming chunks
+        chunk_size = 2
+        for i in range(0, len(user_response), chunk_size):
+            yield user_response[i:i + chunk_size]
             import time
-            time.sleep(0.02)  # Slightly longer delay for readability
+            time.sleep(0.02)
     def _extract_clean_content(self, buffer: str) -> str:
         """Extract clean content before any summary markers"""

{kssrag-0.2.3 → kssrag-0.2.4}/kssrag/server.py RENAMED Viewed

@@ -109,19 +109,37 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
             agent = sessions[session_id]
+            # async def generate():
+            #     full_response = ""
+            #     try:
+            #         # Use agent's query_stream which handles context and summarization
+            #         for chunk in agent.query_stream(query, top_k=5):
+            #             full_response += chunk
+            #             yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
+            #         yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
+            #     except Exception as e:
+            #         logger.error(f"Streaming error: {str(e)}")
+            #         yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
             async def generate():
-                full_response = ""
                 try:
-                    # Use agent's query_stream which handles context and summarization
-                    for chunk in agent.query_stream(query, top_k=5):
-                        full_response += chunk
-                        yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
+                    # Stream tokens ONLY
+                    for token in agent.query_stream(query, top_k=5):
+                        if not token:
+                            continue
+                        yield f"data: {json.dumps({'chunk': token, 'done': False})}\n\n"
+                    # Signal completion (no payload)
                     yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
                 except Exception as e:
                     logger.error(f"Streaming error: {str(e)}")
                     yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
             return StreamingResponse(
                 generate(),

{kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kssrag
-Version: 0.2.3
+Version: 0.2.4
 Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
 Home-page: https://github.com/Ksschkw/kssrag
 Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.3-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -809,6 +809,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.3 → kssrag-0.2.4}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
 setup(
     name="kssrag",
-    version="0.2.3",
+    version="0.2.4",
     description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
     long_description=long_description,
     long_description_content_type="text/markdown",