PyPI - kssrag - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

kssrag 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

kssrag/cli.py +59 -13
kssrag/config.py +15 -1
kssrag/core/agents.py +61 -10
kssrag/core/chunkers.py +95 -1
kssrag/core/vectorstores.py +103 -2
kssrag/models/openrouter.py +77 -15
kssrag/server.py +49 -0
kssrag/utils/document_loaders.py +80 -2
kssrag/utils/helpers.py +74 -31
kssrag/utils/ocr.py +48 -0
kssrag/utils/ocr_loader.py +151 -0
kssrag-0.2.0.dist-info/METADATA +840 -0
kssrag-0.2.0.dist-info/RECORD +33 -0
tests/test_bm25s.py +74 -0
tests/test_config.py +42 -0
tests/test_image_chunker.py +17 -0
tests/test_integration.py +35 -0
tests/test_ocr.py +142 -0
tests/test_streaming.py +41 -0
kssrag-0.1.1.dist-info/METADATA +0 -407
kssrag-0.1.1.dist-info/RECORD +0 -25
{kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/WHEEL +0 -0
{kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/entry_points.txt +0 -0
{kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/top_level.txt +0 -0

kssrag/cli.py CHANGED Viewed

@@ -2,8 +2,8 @@ import argparse
 import sys
 import os  # Add this import if not already present
 from .utils.document_loaders import load_document, load_json_documents
-from .core.chunkers import TextChunker, JSONChunker, PDFChunker
-from .core.vectorstores import BM25VectorStore, FAISSVectorStore, TFIDFVectorStore, HybridVectorStore, HybridOfflineVectorStore
+from .core.chunkers import ImageChunker, OfficeChunker, TextChunker, JSONChunker, PDFChunker
+from .core.vectorstores import BM25SVectorStore, BM25VectorStore, FAISSVectorStore, TFIDFVectorStore, HybridVectorStore, HybridOfflineVectorStore
 from .core.retrievers import SimpleRetriever, HybridRetriever
 from .core.agents import RAGAgent
 from .models.openrouter import OpenRouterLLM
@@ -19,27 +19,36 @@ def main():
     query_parser = subparsers.add_parser("query", help="Query the RAG system")
     query_parser.add_argument("--file", type=str, required=True, help="Path to document file")
     query_parser.add_argument("--query", type=str, required=True, help="Query to ask")
-    query_parser.add_argument("--format", type=str, default="text", choices=["text", "json", "pdf"],
-                             help="Document format")
+    query_parser.add_argument("--format", type=str, default="text",
+                         choices=["text", "json", "pdf", "image", "docx", "excel", "pptx"],
+                         help="Document format")
     query_parser.add_argument("--vector-store", type=str, default=config.VECTOR_STORE_TYPE,
-                             choices=["bm25", "faiss", "tfidf", "hybrid_online", "hybrid_offline"],
-                             help="Vector store type")
+                         choices=["bm25", "bm25s", "faiss", "tfidf", "hybrid_online", "hybrid_offline"],
+                         help="Vector store type")
+    query_parser.add_argument("--stream", action="store_true",
+                         help="Enable streaming response")
     query_parser.add_argument("--top-k", type=int, default=config.TOP_K, help="Number of results to retrieve")
     query_parser.add_argument("--system-prompt", type=str, help="Path to a file containing the system prompt or the prompt text itself")
+    query_parser.add_argument("--ocr-mode", type=str, choices=["typed", "handwritten"],
+                         default=config.OCR_DEFAULT_MODE,
+                         help="OCR mode for image processing")
     # Server command
     server_parser = subparsers.add_parser("server", help="Start the RAG API server")
     server_parser.add_argument("--file", type=str, required=True, help="Path to document file")
-    server_parser.add_argument("--format", type=str, default="text", choices=["text", "json", "pdf"],
-                              help="Document format")
+    server_parser.add_argument("--format", type=str, default="text",
+                          choices=["text", "json", "pdf", "image", "docx", "excel", "pptx"],
+                          help="Document format")
+    # I Updated the server parser vector store choices
     server_parser.add_argument("--vector-store", type=str, default=config.VECTOR_STORE_TYPE,
-                              choices=["bm25", "faiss", "tfidf", "hybrid_online", "hybrid_offline"],
-                              help="Vector store type")
+                            choices=["bm25", "bm25s", "faiss", "tfidf", "hybrid_online", "hybrid_offline"],  # Add bm25s
+                            help="Vector store type")
     server_parser.add_argument("--port", type=int, default=config.SERVER_PORT, help="Port to run server on")
     server_parser.add_argument("--host", type=str, default=config.SERVER_HOST, help="Host to run server on")
     server_parser.add_argument("--system-prompt", type=str, help="Path to a file containing the system prompt or the prompt text itself")
     args = parser.parse_args()
+    vector_store_type = args.vector_store if hasattr(args, 'vector_store') else config.VECTOR_STORE_TYPE
     # Validate config
     validate_config()
@@ -52,6 +61,7 @@ def main():
             with open(prompt_arg, 'r', encoding='utf-8') as f:
                 return f.read()
         return prompt_arg
     if args.command == "query":
         # Load and process document
@@ -66,6 +76,17 @@ def main():
         elif args.format == "pdf":
             chunker = PDFChunker(chunk_size=config.CHUNK_SIZE, overlap=config.CHUNK_OVERLAP)
             documents = chunker.chunk_pdf(args.file, {"source": args.file})
+        elif args.format == "image":
+            chunker = ImageChunker(
+                chunk_size=config.CHUNK_SIZE,
+                overlap=config.CHUNK_OVERLAP,
+                ocr_mode=getattr(args, 'ocr_mode', config.OCR_DEFAULT_MODE)
+            )
+            documents = chunker.chunk(args.file, {"source": args.file})
+        elif args.format in ["docx", "excel", "pptx"]:
+            # Use OfficeChunker for office documents
+            chunker = OfficeChunker(chunk_size=config.CHUNK_SIZE, overlap=config.CHUNK_OVERLAP)
+            documents = chunker.chunk_office(args.file, {"source": args.file})
         else:
             logger.error(f"Unsupported format: {args.format}")
             return 1
@@ -81,6 +102,8 @@ def main():
             vector_store = HybridVectorStore()
         elif args.vector_store == "hybrid_offline":
             vector_store = HybridOfflineVectorStore()
+        elif args.vector_store == "bm25s":
+            vector_store = BM25SVectorStore()
         else:
             logger.error(f"Unsupported vector store: {args.vector_store}")
             return 1
@@ -94,9 +117,30 @@ def main():
         agent = RAGAgent(retriever, llm, system_prompt=system_prompt)
         # Query and print response
-        response = agent.query(args.query, top_k=args.top_k)
-        print(f"Query: {args.query}")
-        print(f"Response: {response}")
+        # response = agent.query(args.query, top_k=args.top_k)
+        # print(f"Query: {args.query}")
+        # print(f"Response: {response}")
+        # In the query section, after creating the agent:
+        if args.stream:
+            print(f"Query: {args.query}")
+            print("Response: ", end="", flush=True)
+            try:
+                # Collect all chunks and print them as they come
+                full_response = ""
+                for chunk in agent.query_stream(args.query, top_k=args.top_k):
+                    print(chunk, end="", flush=True)
+                    full_response += chunk
+                print()  # New line at the end
+                # The response is already added to conversation in query_stream
+            except Exception as e:
+                print(f"\nError during streaming: {str(e)}")
+        else:
+            response = agent.query(args.query, top_k=args.top_k)
+            print(f"Query: {args.query}")
+            print(f"Response: {response}")
     elif args.command == "server":
         # Load and process document
@@ -126,6 +170,8 @@ def main():
             vector_store = HybridVectorStore()
         elif args.vector_store == "hybrid_offline":
             vector_store = HybridOfflineVectorStore()
+        elif args.vector_store == "bm25s":
+            vector_store = BM25SVectorStore()
         else:
             logger.error(f"Unsupported vector store: {args.vector_store}")
             return 1

kssrag/config.py CHANGED Viewed

@@ -9,6 +9,7 @@ load_dotenv()
 class VectorStoreType(str, Enum):
     BM25 = "bm25"
+    BM25S = "bm25s"
     FAISS = "faiss"
     TFIDF = "tfidf"
     HYBRID_ONLINE = "hybrid_online"
@@ -19,6 +20,7 @@ class ChunkerType(str, Enum):
     TEXT = "text"
     JSON = "json"
     PDF = "pdf"
+    IMAGE = "image"
     CUSTOM = "custom"
 class RetrieverType(str, Enum):
@@ -36,7 +38,7 @@ class Config(BaseSettings):
     )
     DEFAULT_MODEL: str = Field(
-        default=os.getenv("DEFAULT_MODEL", "deepseek/deepseek-chat-v3.1:free"),
+        default=os.getenv("DEFAULT_MODEL", "deepseek/deepseek-chat"),
         description="Default model to use for LLM responses"
     )
@@ -183,6 +185,18 @@ class Config(BaseSettings):
         env_file = ".env"
         case_sensitive = False
         use_enum_values = True
+    # OCR settings
+    OCR_DEFAULT_MODE: str = Field(
+        default=os.getenv("OCR_DEFAULT_MODE", "typed"),
+        description="Default OCR mode: typed or handwritten"
+    )
+    # Streaming settings
+    ENABLE_STREAMING: bool = Field(
+        default=os.getenv("ENABLE_STREAMING", "False").lower() == "true",
+        description="Whether to enable streaming responses"
+    )
     @validator('FALLBACK_MODELS', 'CORS_ORIGINS', 'CORS_ALLOW_METHODS', 'CORS_ALLOW_HEADERS', pre=True)
     def split_string(cls, v):

kssrag/core/agents.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Dict, Any, Optional
+from typing import Generator, List, Dict, Any, Optional
 from ..utils.helpers import logger
 class RAGAgent:
@@ -29,6 +29,32 @@ class RAGAgent:
             # Keep the most recent messages
             self.conversation = [system_msg] + other_msgs[-9:] if system_msg else other_msgs[-10:]
+    def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
+        """Build context string from documents"""
+        if not context_docs:
+            return ""
+        context = "Relevant information:\n"
+        for i, doc in enumerate(context_docs, 1):
+            context += f"\n--- Document {i} ---\n{doc['content']}\n"
+        return context
+    def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
+        """Build messages for LLM including context"""
+        # Start with conversation history
+        messages = self.conversation.copy()
+        # Add user query with context
+        user_message = f"{context}\n\nQuestion: {question}" if context else question
+        # Replace the last user message if it exists, otherwise add new one
+        if messages and messages[-1]["role"] == "user":
+            messages[-1]["content"] = user_message
+        else:
+            messages.append({"role": "user", "content": user_message})
+        return messages
     def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
         """Process a query and return a response"""
         try:
@@ -40,18 +66,13 @@ class RAGAgent:
                 return "I couldn't find relevant information to answer your question."
             # Format context
-            context = ""
-            if include_context and context_docs:
-                context = "Relevant information:\n"
-                for i, doc in enumerate(context_docs, 1):
-                    context += f"\n--- Document {i} ---\n{doc['content']}\n"
+            context = self._build_context(context_docs) if include_context and context_docs else ""
-            # Add user query with context
-            user_message = f"{context}\n\nQuestion: {question}" if context else question
-            self.add_message("user", user_message)
+            # Build messages
+            messages = self._build_messages(question, context)
             # Generate response
-            response = self.llm.predict(self.conversation)
+            response = self.llm.predict(messages)
             # Add assistant response to conversation
             self.add_message("assistant", response)
@@ -62,6 +83,36 @@ class RAGAgent:
             logger.error(f"Error processing query: {str(e)}")
             return "I encountered an issue processing your query. Please try again."
+    def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
+        """Query the RAG system with streaming response"""
+        try:
+            # Retrieve relevant documents
+            relevant_docs = self.retriever.retrieve(question, top_k=top_k)
+            # Build context from documents
+            context = self._build_context(relevant_docs)
+            # Build messages
+            messages = self._build_messages(question, context)
+            # Stream response from LLM
+            if hasattr(self.llm, 'predict_stream'):
+                for chunk in self.llm.predict_stream(messages):
+                    yield chunk
+                # Add the complete response to conversation history
+                full_response = "".join([chunk for chunk in self.llm.predict_stream(messages)])
+                self.add_message("assistant", full_response)
+            else:
+                # Fallback to non-streaming
+                response = self.llm.predict(messages)
+                self.add_message("assistant", response)
+                yield response
+        except Exception as e:
+            logger.error(f"Error in streaming query: {str(e)}")
+            yield f"Error: {str(e)}"
     def clear_conversation(self):
         """Clear conversation history except system message"""
         system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)

kssrag/core/chunkers.py CHANGED Viewed

@@ -1,8 +1,16 @@
 import json
 import re
+import os
 from typing import List, Dict, Any, Optional
 import pypdf
 from ..utils.helpers import logger
+import os
+try:
+    from ..utils.ocr_loader import OCRLoader
+    OCR_AVAILABLE = True
+except ImportError:
+    OCR_AVAILABLE = False
+    OCRLoader = None
 class BaseChunker:
     """Base class for document chunkers"""
@@ -46,7 +54,7 @@ class TextChunker(BaseChunker):
         return chunks
 class JSONChunker(BaseChunker):
-    """Chunker for JSON documents (like your drug data)"""
+    """Chunker for JSON documents"""
     def chunk(self, data: List[Dict[str, Any]], metadata_field: str = "name") -> List[Dict[str, Any]]:
         """Create chunks from JSON data"""
@@ -97,4 +105,90 @@ class PDFChunker(TextChunker):
     def chunk_pdf(self, pdf_path: str, metadata: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
         """Extract text from PDF and chunk it"""
         text = self.extract_text(pdf_path)
+        return self.chunk(text, metadata)
+class ImageChunker(BaseChunker):
+    """Chunker for image documents using OCR"""
+    def __init__(self, chunk_size: int = 500, overlap: int = 50, ocr_mode: str = "typed"):
+        super().__init__(chunk_size, overlap)
+        self.ocr_mode = ocr_mode  # typed or handwritten
+        self.ocr_loader = None
+        # Initialize OCR loader
+        try:
+            from ..utils.ocr_loader import OCRLoader
+            self.ocr_loader = OCRLoader()
+            logger.info(f"OCR loader initialized with mode: {ocr_mode}")
+        except ImportError as e:
+            logger.error(f"OCR dependencies not available: {str(e)}")
+            raise ImportError(
+                "OCR functionality requires extra dependencies. "
+                "Install with: pip install kssrag[ocr]"
+            ) from e
+    def extract_text_from_image(self, image_path: str) -> str:
+        """Extract text from image using specified OCR engine"""
+        if not self.ocr_loader:
+            raise RuntimeError("OCR loader not initialized")
+        if self.ocr_mode not in ["typed", "handwritten"]:
+            raise ValueError(f"Invalid OCR mode: {self.ocr_mode}. Must be 'typed' or 'handwritten'")
+        logger.info(f"Extracting text from {image_path} using {self.ocr_mode} OCR")
+        try:
+            text = self.ocr_loader.extract_text(image_path, self.ocr_mode)
+            if not text.strip():
+                logger.warning(f"No text extracted from image: {image_path}")
+                return ""
+            logger.info(f"Successfully extracted {len(text)} characters from {image_path}")
+            return text
+        except Exception as e:
+            logger.error(f"OCR extraction failed for {image_path}: {str(e)}")
+            raise RuntimeError(f"Failed to extract text from image {image_path}: {str(e)}")
+    def chunk(self, image_path: str, metadata: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Extract text from image and chunk it"""
+        if metadata is None:
+            metadata = {}
+        # Validate image file
+        if not os.path.exists(image_path):
+            raise FileNotFoundError(f"Image file not found: {image_path}")
+        # Extract text from image
+        text = self.extract_text_from_image(image_path)
+        if not text.strip():
+            return []
+        # Use text chunking on extracted text
+        text_chunker = TextChunker(chunk_size=self.chunk_size, overlap=self.overlap)
+        chunks = text_chunker.chunk(text, metadata)
+        # Add OCR-specific metadata
+        for chunk in chunks:
+            chunk["metadata"]["ocr_extracted"] = True
+            chunk["metadata"]["image_source"] = image_path
+            chunk["metadata"]["ocr_mode"] = self.ocr_mode
+        logger.info(f"Created {len(chunks)} chunks from image {image_path}")
+        return chunks
+class OfficeChunker(TextChunker):
+    """Chunker for Office documents (DOCX, Excel, PowerPoint)"""
+    def chunk_office(self, file_path: str, metadata: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Chunk office documents by extracting text first"""
+        if metadata is None:
+            metadata = {}
+        # Extract text based on file type
+        from ..utils.document_loaders import load_document
+        text = load_document(file_path)
         return self.chunk(text, metadata)

kssrag/core/vectorstores.py CHANGED Viewed

@@ -13,6 +13,13 @@ from typing import List, Dict, Any, Optional
 from ..utils.helpers import logger
 from ..config import config
+FAISS_AVAILABLE = False
+try:
+    import faiss
+    FAISS_AVAILABLE = True
+except ImportError:
+    pass
 class BaseVectorStore:
     """Base class for vector stores"""
@@ -102,11 +109,23 @@ class BM25VectorStore(BaseVectorStore):
             logger.info(f"BM25 index loaded from {self.persist_path}")
 import tempfile
+# class FAISSVectorStore(BaseVectorStore):
+#     def __init__(self, persist_path: Optional[str] = None, model_name: Optional[str] = None):
+#         if not FAISS_AVAILABLE:
+#             raise ImportError("FAISS is not available. Please install it with 'pip install faiss-cpu' or use a different vector store.")
+#         super().__init__(persist_path)
+#         self.model_name = model_name or config.FAISS_MODEL_NAME
 class FAISSVectorStore(BaseVectorStore):
     def __init__(self, persist_path: Optional[str] = None, model_name: Optional[str] = None):
+        # Only setup FAISS when this vector store is actually used
+        from ..utils.helpers import setup_faiss
+        faiss_available, _ = setup_faiss("faiss")  # Explicitly request FAISS
+        if not faiss_available:
+            raise ImportError("FAISS is not available. Please install it with 'pip install faiss-cpu' or use a different vector store.")
         super().__init__(persist_path)
         self.model_name = model_name or config.FAISS_MODEL_NAME
         # Handle cache directory permissions
         try:
             cache_dir = config.CACHE_DIR
@@ -394,4 +413,86 @@ class HybridOfflineVectorStore(BaseVectorStore):
         self.bm25_store.load()
         self.tfidf_store.load()
         self.documents = self.bm25_store.documents
-        logger.info(f"Hybrid offline index loaded")
+        logger.info(f"Hybrid offline index loaded")
+import bm25s
+from Stemmer import Stemmer
+class BM25SVectorStore(BaseVectorStore):
+    """BM25S vector store using the bm25s library for ultra-fast retrieval"""
+    def __init__(self, persist_path: Optional[str] = "bm25s_index.pkl"):
+        super().__init__(persist_path)
+        self.bm25_retriever = None
+        self.stemmer = Stemmer("english")
+        self.corpus_tokens = None
+    def add_documents(self, documents: List[Dict[str, Any]]):
+        self.documents = documents
+        self.doc_texts = [doc["content"] for doc in documents]
+        try:
+            # Tokenize corpus with BM25S
+            self.corpus_tokens = bm25s.tokenize(
+                self.doc_texts,
+                stopwords="en",
+                stemmer=self.stemmer
+            )
+            # Create and index with BM25S
+            self.bm25_retriever = bm25s.BM25()
+            self.bm25_retriever.index(self.corpus_tokens)
+            logger.info(f"BM25S index created with {len(self.documents)} documents")
+        except Exception as e:
+            logger.error(f"BM25S initialization failed: {str(e)}")
+            raise
+    def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        if not self.bm25_retriever:
+            raise ValueError("BM25S index not initialized. Call add_documents first.")
+        try:
+            # Tokenize query with BM25S
+            query_tokens = bm25s.tokenize([query], stemmer=self.stemmer)
+            # Retrieve with BM25S
+            results, scores = self.bm25_retriever.retrieve(query_tokens, k=top_k)
+            # Format results
+            retrieved_docs = []
+            for i in range(results.shape[1]):
+                doc_idx = results[0, i]
+                score = scores[0, i]
+                if doc_idx < len(self.documents):
+                    retrieved_docs.append(self.documents[doc_idx])
+            logger.info(f"BM25S retrieved {len(retrieved_docs)} documents for query: {query}")
+            return retrieved_docs
+        except Exception as e:
+            logger.error(f"BM25S retrieval failed for query '{query}': {str(e)}")
+            return []
+    def persist(self):
+        if self.persist_path:
+            with open(self.persist_path, 'wb') as f:
+                pickle.dump({
+                    'documents': self.documents,
+                    'doc_texts': self.doc_texts,
+                    'corpus_tokens': self.corpus_tokens,
+                    'bm25_retriever': self.bm25_retriever
+                }, f)
+            logger.info(f"BM25S index persisted to {self.persist_path}")
+    def load(self):
+        if self.persist_path and os.path.exists(self.persist_path):
+            with open(self.persist_path, 'rb') as f:
+                data = pickle.load(f)
+                self.documents = data['documents']
+                self.doc_texts = data['doc_texts']
+                self.corpus_tokens = data['corpus_tokens']
+                self.bm25_retriever = data['bm25_retriever']
+            logger.info(f"BM25S index loaded from {self.persist_path}")

kssrag/models/openrouter.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import requests
 import json
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Generator
 from ..utils.helpers import logger
 from ..config import config
 class OpenRouterLLM:
-    """OpenRouter LLM interface with fallback models"""
+    """OpenRouter LLM interface with streaming support"""
     def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None,
-                 fallback_models: Optional[List[str]] = None):
+                 fallback_models: Optional[List[str]] = None, stream: bool = False):
         self.api_key = api_key or config.OPENROUTER_API_KEY
         self.model = model or config.DEFAULT_MODEL
         self.fallback_models = fallback_models or config.FALLBACK_MODELS
+        self.stream = stream
         self.base_url = "https://openrouter.ai/api/v1/chat/completions"
         self.headers = {
             "Authorization": f"Bearer {self.api_key}",
@@ -21,8 +22,14 @@ class OpenRouterLLM:
         }
     def predict(self, messages: List[Dict[str, str]]) -> str:
-        """Generate a response using OpenRouter's API with fallbacks"""
-        logger.info(f"Attempting to generate response with {len(messages)} messages")
+        """Generate response with fallback models"""
+        if self.stream:
+            full_response = ""
+            for chunk in self.predict_stream(messages):
+                full_response += chunk
+            return full_response
+        logger.info(f"Generating response with {len(messages)} messages")
         for model in [self.model] + self.fallback_models:
             payload = {
@@ -36,21 +43,17 @@ class OpenRouterLLM:
             }
             try:
-                logger.info(f"Trying model: {model}")
+                logger.info(f"Using model: {model}")
                 response = requests.post(
                     self.base_url,
                     headers=self.headers,
                     json=payload,
-                    timeout=15
+                    timeout=30
                 )
-                # Check for HTTP errors
                 response.raise_for_status()
-                # Parse JSON response
                 response_data = response.json()
-                # Validate response structure
                 if ("choices" not in response_data or
                     len(response_data["choices"]) == 0 or
                     "message" not in response_data["choices"][0] or
@@ -60,7 +63,7 @@ class OpenRouterLLM:
                     continue
                 content = response_data["choices"][0]["message"]["content"]
-                logger.info(f"Successfully used model: {model}")
+                logger.info(f"Successfully generated response with model: {model}")
                 return content
             except requests.exceptions.Timeout:
@@ -79,7 +82,66 @@ class OpenRouterLLM:
                 logger.warning(f"Unexpected error with model {model}: {str(e)}")
                 continue
-        # If all models fail, return a friendly error message
-        error_msg = "I'm having trouble connecting to the knowledge service right now. Please try again in a moment."
+        error_msg = "Unable to generate response from available models. Please try again."
         logger.error("All model fallbacks failed to respond")
-        return error_msg
+        return error_msg
+    def predict_stream(self, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
+        """Stream response from OpenRouter API"""
+        logger.info(f"Streaming response with {len(messages)} messages")
+        for model in [self.model] + self.fallback_models:
+            payload = {
+                "model": model,
+                "messages": messages,
+                "temperature": 0.7,
+                "max_tokens": 1024,
+                "top_p": 1,
+                "stop": None,
+                "stream": True
+            }
+            try:
+                logger.info(f"Streaming with model: {model}")
+                response = requests.post(
+                    self.base_url,
+                    headers=self.headers,
+                    json=payload,
+                    timeout=60,
+                    stream=True
+                )
+                response.raise_for_status()
+                for line in response.iter_lines():
+                    if line:
+                        line = line.decode('utf-8')
+                        if line.startswith('data: '):
+                            data = line[6:]
+                            if data.strip() == '[DONE]':
+                                logger.info("Stream completed successfully")
+                                return
+                            try:
+                                chunk_data = json.loads(data)
+                                if ('choices' in chunk_data and
+                                    len(chunk_data['choices']) > 0 and
+                                    'delta' in chunk_data['choices'][0] and
+                                    'content' in chunk_data['choices'][0]['delta']):
+                                    content = chunk_data['choices'][0]['delta']['content']
+                                    if content:
+                                        yield content
+                            except json.JSONDecodeError as e:
+                                logger.warning(f"Failed to parse stream chunk: {str(e)}")
+                                continue
+                logger.info(f"Successfully streamed from model: {model}")
+                return
+            except Exception as e:
+                logger.warning(f"Streaming failed with model {model}: {str(e)}")
+                continue
+        error_msg = "Unable to stream response from available models. Please try again."
+        logger.error("All model fallbacks failed for streaming")
+        yield error_msg

kssrag 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

kssrag 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl