PyPI - kssrag - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

kssrag 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{kssrag-0.2.2 → kssrag-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kssrag
-Version: 0.2.2
+Version: 0.2.4
 Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
 Home-page: https://github.com/Ksschkw/kssrag
 Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -809,6 +809,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.2 → kssrag-0.2.4}/README.md RENAMED Viewed

@@ -4,7 +4,7 @@
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -728,6 +728,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/agents.py RENAMED Viewed

@@ -21,13 +21,36 @@ class RAGAgent:
         if not any(msg.get("role") == "system" for msg in self.conversation):
             self.add_message("system", self.system_prompt)
+    # def add_message(self, role: str, content: str):
+    #     """Add a message to the conversation history"""
+    #     self.conversation.append({"role": role, "content": content})
+    #     # Keep conversation manageable (last 15 messages)
+    #     if len(self.conversation) > 15:
+    #         self._smart_trim_conversation()
     def add_message(self, role: str, content: str):
-        """Add a message to the conversation history"""
+        """Add a message to the conversation history (with simple dedupe for assistant)."""
+        content = content.strip()
+        # Prevent adding empty messages
+        if not content:
+            logger.info("Attempted to add empty message – ignored.")
+            return
+        # If last message is identical assistant content, skip to avoid duplicates
+        if self.conversation:
+            last = self.conversation[-1]
+            if role == "assistant" and last.get("role") == "assistant":
+                if last.get("content", "").strip() == content:
+                    logger.info("Duplicate assistant message suppressed.")
+                    return
         self.conversation.append({"role": role, "content": content})
         # Keep conversation manageable (last 15 messages)
         if len(self.conversation) > 15:
             self._smart_trim_conversation()
     def _smart_trim_conversation(self):
         """Trim conversation while preserving system message and recent exchanges"""
@@ -63,43 +86,55 @@ class RAGAgent:
         for i, doc in enumerate(context_docs, 1):
             context += f"\n--- Document {i} ---\n{doc['content']}\n"
         return context
     def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
-        """Build messages for LLM including context and conversation summaries"""
-        # Start with conversation history
-        messages = self.conversation.copy()
+        """
+        Build messages for the LLM including context, conversation history, and summaries.
+        Improvements:
+        - Prevents token explosion by trimming conversation smartly
+        - Injects last 3 summaries only
+        - Adds stealth summarization only if there are at least 2 user-assistant exchanges
+        - Preserves system messages and formatting
+        """
+        # Start with system + conversation history
+        messages: List[Dict[str, str]] = []
+        # Always include system message at top
+        system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
+        if system_msg:
+            messages.append(system_msg)
+        # Keep only last 12 user/assistant messages to prevent token overload
+        conversation_tail = [msg for msg in self.conversation if msg["role"] != "system"][-12:]
+        messages.extend(conversation_tail)
         logger.info(f"Building messages for query: '{question}'")
-        logger.info(f"Conversation history: {len(self.conversation)} messages")
+        logger.info(f"Conversation tail: {len(conversation_tail)} messages")
         logger.info(f"Active summaries: {len(self.conversation_summaries)}")
-        logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
+        logger.info(f"Context length: {len(context)} chars" if context else "No retrieved context")
-        # Add conversation summaries as context if available
+        # Inject last 5 summaries safely as a system message
         if self.conversation_summaries:
-            logger.info(f"Using summaries: {self.conversation_summaries}")
-            summary_context = "Previous conversation context:\n" + "\n".join(
-                f"- {summary}" for summary in self.conversation_summaries[-3:]  # Last 3 summaries
-            )
-            messages.append({
-                "role": "system",
-                "content": summary_context
-            })
-        # Add retrieved document context
-        user_message = f"{context}\n\nQuestion: {question}" if context else question
-        # ✅ FIX: Always append new user message (don't replace existing ones)
-        messages.append({"role": "user", "content": user_message})
-        # Add stealth summarization instruction for ongoing conversations
-        if len(self.conversation) >= 1:  # More than just system + current user message + 2nd Query
+            summaries_to_use = self.conversation_summaries[-5:]
+            summary_context = "Previous conversation context:\n" + "\n".join(f"- {s}" for s in summaries_to_use)
+            messages.append({"role": "system", "content": summary_context})
+            logger.info(f"Injected {len(summaries_to_use)} conversation summaries")
+        # Add the user's current question + retrieved context
+        user_content = f"{context}\n\nQuestion: {question}" if context else question
+        messages.append({"role": "user", "content": user_content})
+        # Add stealth summarization only if conversation has at least 2 user-assistant pairs
+        exchange_count = sum(1 for msg in self.conversation if msg["role"] != "system") // 2
+        if exchange_count >= 2:
             summary_instruction = self._create_summary_instruction()
             messages.append({"role": "system", "content": summary_instruction})
-            logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
-            logger.debug(f"Instruction content: {summary_instruction}")
+            logger.info(f"Stealth summary instruction added ({len(summary_instruction)} chars)")
-        logger.info(f" Final message count to LLM: {len(messages)}")
+        logger.info(f"Final message count to LLM: {len(messages)}")
         return messages
     def _create_summary_instruction(self) -> str:
         """Create the stealth summarization instruction with examples"""
@@ -123,136 +158,85 @@ class RAGAgent:
     - Focus on user preferences, names, important context
     The summary will be automatically hidden from the user."""
-    # def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
-    #     """Extract summary from response and return clean user response - handles partial markers"""
-    #     summary_start = "[SUMMARY_START]"
-    #     summary_end = "[SUMMARY_END]"
-    #     # Check if we have complete markers
-    #     if summary_start in full_response and summary_end in full_response:
-    #         start_idx = full_response.find(summary_start) + len(summary_start)
-    #         end_idx = full_response.find(summary_end)
-    #         summary = full_response[start_idx:end_idx].strip()
-    #         user_response = full_response[:full_response.find(summary_start)].strip()
-    #         logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
-    #         logger.info(f"User response length: {len(user_response)} chars")
-    #         logger.info(f"Summary extracted: '{summary}'")
-    #         return user_response, summary
-    #     # Check if we have partial markers (common in streaming)
-    #     elif summary_start in full_response:
-    #         # We have start marker but no end marker - extract what we can
-    #         start_idx = full_response.find(summary_start) + len(summary_start)
-    #         potential_summary = full_response[start_idx:].strip()
-    #         # Clean up any partial end markers or weird formatting
-    #         if potential_summary:
-    #             # Remove any trailing partial markers or whitespace
-    #             cleaned_summary = potential_summary.split('[SUMMARY_')[0].split('[SUMMARY')[0].strip()
-    #             user_response = full_response[:full_response.find(summary_start)].strip()
-    #             if cleaned_summary and len(cleaned_summary) > 10:  # Only if meaningful content
-    #                 logger.info(f"⚠️  Partial summary extracted (missing end marker): '{cleaned_summary}'")
-    #                 return user_response, cleaned_summary
-    #         logger.info("❌ Incomplete summary markers found")
-    #         return full_response, None
-    #     logger.info("❌ No summary markers found, returning full response")
-    #     logger.info(f"Full response length: {len(full_response)} chars")
-    #     return full_response, None
     def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
-        """Extract summary from response and return clean user response - handles partial markers"""
-        # Keep original markers for backward compatibility
+        """Extract summary from response and return clean user response safely."""
+        if not full_response:
+            return "", None
         summary_start = "[SUMMARY_START]"
         summary_end = "[SUMMARY_END]"
-        # NEW: Normalize the response first (improvement from new version)
-        normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
-        # Check if we have complete markers - KEEP original logic but use normalized
+        original = full_response
+        normalized = original.replace('\r\n', '\n').replace('\r', '\n')
+        # Case 1: Full summary markers
         if summary_start in normalized and summary_end in normalized:
             start_idx = normalized.find(summary_start) + len(summary_start)
             end_idx = normalized.find(summary_end)
             summary = normalized[start_idx:end_idx].strip()
-            user_response = normalized[:normalized.find(summary_start)].strip()
-            logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
-            logger.info(f"User response length: {len(user_response)} chars")
-            logger.info(f"Summary extracted: '{summary}'")
-            # NEW: Add validation from improved version
+            user_response = original.split(summary_start)[0].strip()
             if not summary or len(summary) < 5:
-                logger.info("❌ Summary too short, returning full response")
-                return full_response.strip(), None
+                logger.info("Summary too short or invalid – returning full response as user response")
+                return original.strip(), None
             return user_response, summary
-        # Check if we have partial markers (common in streaming) - IMPROVED logic
-        elif summary_start in normalized:
-            # We have start marker but no end marker - extract what we can
+        # Case 2: Partial summary start only
+        if summary_start in normalized:
             start_idx = normalized.find(summary_start) + len(summary_start)
-            # NEW: Take reasonable chunk (200 chars) instead of everything
-            potential_summary = normalized[start_idx:start_idx+200].strip()
-            # COMBINED: Clean up from both versions
-            if potential_summary:
-                # Clean up any partial markers or weird formatting
-                cleaned_summary = (potential_summary
-                                .split('[SUMMARY_')[0]
-                                .split('[SUMMARY')[0]
-                                .split('[')[0]  # NEW from improved version
-                                .split('\n')[0]  # NEW from improved version
-                                .strip())
-                user_response = normalized[:normalized.find(summary_start)].strip()
-                # COMBINED validation: meaningful content check
-                if cleaned_summary and len(cleaned_summary) >= 10:  # Original threshold
-                    logger.info(f"⚠️  Partial summary extracted (missing end marker): '{cleaned_summary}'")
-                    # NEW: Additional validation
-                    if len(cleaned_summary) >= 5:  # Improved version threshold
-                        return user_response, cleaned_summary
-            logger.info("❌ Incomplete summary markers found")
-            return full_response.strip(), None  # NEW: strip for consistency
-        # No markers found - KEEP original but with normalization
-        logger.info("❌ No summary markers found, returning full response")
-        logger.info(f"Full response length: {len(full_response)} chars")
-        return full_response.strip(), None  # NEW: strip for consistency
+            potential = normalized[start_idx:start_idx + 200].strip()
+            cleaned_summary = (
+                potential
+                .split('[SUMMARY_')[0]
+                .split('[SUMMARY')[0]
+                .split('[')[0]
+                .strip()
+            )
+            user_response = original.split(summary_start)[0].strip()
+            if cleaned_summary and len(cleaned_summary) >= 10:
+                logger.info("Partial summary extracted safely")
+                return user_response, cleaned_summary
+            logger.info("Partial summary invalid or too short")
+            return original.strip(), None
+        # Case 3: No markers
+        return original.strip(), None
     def _add_conversation_summary(self, new_summary: str):
         """Add a new discrete conversation summary"""
         if not new_summary or new_summary.lower() == "none":
-            logger.info("🔄 No summary to add (empty or 'none')")
+            logger.info(" No summary to add (empty or 'none')")
             return
-        # Add as a new discrete summary
+        new_summary = new_summary.strip()
+        if not new_summary:
+            logger.info(" No summary to add after strip")
+            return
+        # Append new summary
         self.conversation_summaries.append(new_summary)
-        logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
+        logger.info(f" ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
         # Keep only recent summaries (last 7)
         if len(self.conversation_summaries) > 7:
             self.conversation_summaries = self.conversation_summaries[-7:]
-            removed = self.conversation_summaries.pop(0)
-            logger.info(f"🗑️  DROPPED Oldest summary: '{removed}'")
-            logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
-        logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
+            logger.info(f" Summary count trimmed to {len(self.conversation_summaries)}")
     def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
         """Process a query with stealth conversation summarization"""
         try:
             # Retrieve relevant context
-            logger.info(f"🔍 QUERY START: '{question}' (top_k: {top_k})")
+            logger.info(f" QUERY START: '{question}' (top_k: {top_k})")
             context_docs = self.retriever.retrieve(question, top_k)
-            logger.info(f"📄 Retrieved {len(context_docs)} context documents")
+            logger.info(f" Retrieved {len(context_docs)} context documents")
             if not context_docs and include_context:
                 logger.warning(f"No context found for query: {question}")
@@ -266,7 +250,7 @@ class RAGAgent:
             # Generate response
             full_response = self.llm.predict(messages)
-            logger.info(f"🤖 LLM response received: {len(full_response)} chars")
+            logger.info(f" LLM response received: {len(full_response)} chars")
             # Extract summary and clean response
             user_response, conversation_summary = self._extract_summary_and_response(full_response)
@@ -281,12 +265,12 @@ class RAGAgent:
             # Add assistant response to conversation (clean version only)
             self.add_message("assistant", user_response)
-            logger.info(f"💬 Final user response: {len(user_response)} chars")
+            logger.info(f" Final user response: {len(user_response)} chars")
             return user_response
         except Exception as e:
             logger.error(f"Error processing query: {str(e)}")
-            # logger.error(f"💥 QUERY FAILED: {str(e)}")
+            # logger.error(f" QUERY FAILED: {str(e)}")
             return "I encountered an issue processing your query. Please try again."
     def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
@@ -294,7 +278,7 @@ class RAGAgent:
         Professional-grade streaming with multiple fallback strategies
         """
         try:
-            logger.info(f"🌊 STREAMING QUERY START: '{question}'")
+            logger.info(f" STREAMING QUERY START: '{question}'")
             # Strategy 1: Try true streaming first
             if hasattr(self.llm, 'predict_stream'):
@@ -305,81 +289,119 @@ class RAGAgent:
                     logger.warning(f"Streaming failed, falling back: {stream_error}")
             # Strategy 2: Fallback to simulated streaming
-            logger.info("🔄 Falling back to simulated streaming")
+            logger.info(" Falling back to simulated streaming")
             yield from self._simulated_streaming(question, top_k)
         except Exception as e:
-            logger.error(f"💥 ALL STREAMING STRATEGIES FAILED: {str(e)}")
+            logger.error(f" ALL STREAMING STRATEGIES FAILED: {str(e)}")
             yield f"Error: {str(e)}"
+    # def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
+    #     """Streaming-safe: never leak summary markers mid-stream."""
+    #     relevant_docs = self.retriever.retrieve(question, top_k=top_k)
+    #     context = self._build_context(relevant_docs)
+    #     messages = self._build_messages(question, context)
+    #     buffer = ""
+    #     summary_buffer = ""
+    #     in_summary = False
+    #     for chunk in self.llm.predict_stream(messages):
+    #         buffer += chunk
+    #         # Detect summary start
+    #         if '[SUMMARY_START]' in buffer:
+    #             in_summary = True
+    #             clean_part = buffer.split('[SUMMARY_START]')[0].strip()
+    #             if clean_part:
+    #                 yield clean_part
+    #             summary_buffer = buffer.split('[SUMMARY_START]')[1]
+    #             buffer = ""
+    #             continue
+    #         if in_summary:
+    #             summary_buffer += chunk
+    #             if '[SUMMARY_END]' in summary_buffer:
+    #                 in_summary = False
+    #                 summary_content = summary_buffer.split('[SUMMARY_END]')[0].strip()
+    #                 if summary_content:
+    #                     self._add_conversation_summary(summary_content)
+    #                     logger.info(f"Summary extracted in stream: '{summary_content}'")
+    #                 buffer = summary_buffer.split('[SUMMARY_END]')[1]  # remainder
+    #                 summary_buffer = ""
+    #                 if buffer:
+    #                     yield buffer.strip()
+    #                 buffer = ""
+    #             continue
+    #         if not in_summary:
+    #             yield chunk
+    #     # Flush leftover buffer
+    #     if buffer.strip() and not in_summary:
+    #         yield buffer.strip()
+    #     elif in_summary:
+    #         logger.info("Leftover buffer contains partial summary – discarded to prevent marker leak")
     def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
-        """True streaming with better error handling"""
-        try:
-            relevant_docs = self.retriever.retrieve(question, top_k=top_k)
-            context = self._build_context(relevant_docs)
-            messages = self._build_messages(question, context)
-            buffer = ""
-            summary_started = False
-            for chunk in self.llm.predict_stream(messages):
-                buffer += chunk
-                # Check for summary markers
-                if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
-                    if not summary_started:
-                        logger.info("🚨 Summary markers detected - cutting stream")
-                        summary_started = True
-                        clean_part = self._extract_clean_content(buffer)
-                        if clean_part:
-                            yield clean_part
-                        # Don't break here - let the method complete naturally
-                        continue
-                if not summary_started:
-                    yield chunk
-            # Process the complete response
-            self._process_complete_response(buffer)
-        except Exception as e:
-            logger.error(f"Streaming error: {e}")
-            raise  # Re-raise to trigger fallback
+        """Token-only streaming. Never reconstruct or re-emit content."""
+        relevant_docs = self.retriever.retrieve(question, top_k=top_k)
+        context = self._build_context(relevant_docs)
+        messages = self._build_messages(question, context)
+        buffer = ""
+        for chunk in self.llm.predict_stream(messages):
+            buffer += chunk
+            # The moment summary markers appear, stop streaming to client
+            if '[SUMMARY_START]' in buffer or 'SUMMARY_' in buffer:
+                logger.info("Summary marker detected — stopping client stream")
+                break
+            # Yield ONLY raw tokens
+            yield chunk
+        # After streaming finishes, process full response exactly once
+        self._process_complete_response(buffer)
     def _process_complete_response(self, full_response: str):
         """Process complete response and extract summary"""
         user_response, conversation_summary = self._extract_summary_and_response(full_response)
         if conversation_summary:
-            logger.info(f"📝 Summary extracted: '{conversation_summary}'")
+            logger.info(f" Summary extracted: '{conversation_summary}'")
             self._add_conversation_summary(conversation_summary)
-        self.add_message("assistant", user_response)
+        # extra guard: only add assistant message if different from last assistant message
+        if user_response:
+            last = self.conversation[-1] if self.conversation else None
+            if not (last and last.get("role") == "assistant" and last.get("content", "").strip() == user_response.strip()):
+                self.add_message("assistant", user_response)
+            else:
+                logger.info("Skipped adding duplicate assistant message in _process_complete_response.")
     def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
-        """Simulated streaming that guarantees no summary leakage"""
+        """Simulated streaming that guarantees no summary leakage."""
         relevant_docs = self.retriever.retrieve(question, top_k=top_k)
         context = self._build_context(relevant_docs)
         messages = self._build_messages(question, context)
-        # Get complete response
         complete_response = self.llm.predict(messages)
-        # Extract clean response
         user_response, conversation_summary = self._extract_summary_and_response(complete_response)
         if conversation_summary:
-            logger.info(f"📝 Summary extracted: '{conversation_summary}'")
             self._add_conversation_summary(conversation_summary)
         self.add_message("assistant", user_response)
-        # Simulate streaming (smaller chunks for better UX)
-        chunk_size = 2  # Even smaller chunks for smoother streaming
+        # Simulate streaming chunks
+        chunk_size = 2
         for i in range(0, len(user_response), chunk_size):
-            yield user_response[i:i+chunk_size]
+            yield user_response[i:i + chunk_size]
             import time
-            time.sleep(0.02)  # Slightly longer delay for readability
+            time.sleep(0.02)
     def _extract_clean_content(self, buffer: str) -> str:
         """Extract clean content before any summary markers"""
@@ -403,5 +425,5 @@ class RAGAgent:
             "message_count": len(self.conversation),
             "recent_messages": [f"{msg['role']}: {msg['content'][:50]}..." for msg in self.conversation[-3:]]
         }
-        logger.info(f"📊 Context snapshot: {context}")
+        logger.info(f" Context snapshot: {context}")
         return context

{kssrag-0.2.2 → kssrag-0.2.4}/kssrag/server.py RENAMED Viewed

@@ -109,23 +109,41 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
             agent = sessions[session_id]
+            # async def generate():
+            #     full_response = ""
+            #     try:
+            #         # Use agent's query_stream which handles context and summarization
+            #         for chunk in agent.query_stream(query, top_k=5):
+            #             full_response += chunk
+            #             yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
+            #         yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
+            #     except Exception as e:
+            #         logger.error(f"Streaming error: {str(e)}")
+            #         yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
             async def generate():
-                full_response = ""
                 try:
-                    # Use agent's query_stream which handles context and summarization
-                    for chunk in agent.query_stream(query, top_k=5):
-                        full_response += chunk
-                        yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
+                    # Stream tokens ONLY
+                    for token in agent.query_stream(query, top_k=5):
+                        if not token:
+                            continue
+                        yield f"data: {json.dumps({'chunk': token, 'done': False})}\n\n"
+                    # Signal completion (no payload)
                     yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
                 except Exception as e:
                     logger.error(f"Streaming error: {str(e)}")
                     yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
             return StreamingResponse(
                 generate(),
-                media_type="text/plain",
+                media_type="text/event-stream",
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",

{kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kssrag
-Version: 0.2.2
+Version: 0.2.4
 Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
 Home-page: https://github.com/Ksschkw/kssrag
 Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
 ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
-![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
+![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
 ![Framework](https://img.shields.io/badge/framework-RAG-orange)
 ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
@@ -809,6 +809,7 @@ kssrag/
 - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
 - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
 - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
+- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
 ### Community
 - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests

{kssrag-0.2.2 → kssrag-0.2.4}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
 setup(
     name="kssrag",
-    version="0.2.2",
+    version="0.2.4",
     description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
     long_description=long_description,
     long_description_content_type="text/markdown",