PyPI - mb-rag - Versions diffs - 1.1.61__tar.gz → 1.1.67__tar.gz - Mend

mb-rag 1.1.61tar.gz → 1.1.67tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{mb_rag-1.1.61 → mb_rag-1.1.67}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mb_rag
-Version: 1.1.61
+Version: 1.1.67
 Summary: RAG function file
 Author: ['Malav Bateriwala']
 Requires-Python: >=3.8

{mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/basic.py RENAMED Viewed

@@ -357,6 +357,8 @@ class ModelFactory:
         Returns:
             str: Output from the model
         """
+        if not isinstance(images, list):
+            images = [images]
         base64_images = [self._image_to_base64(image) for image in images]
         image_prompt_create = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_images[i]}"}} for i in range(len(images))]
         prompt_new = [{"type": "text", "text": prompt}, *image_prompt_create]

mb_rag-1.1.67/mb_rag/prompts_bank.py ADDED Viewed

@@ -0,0 +1,77 @@
+from langchain_core.prompts.chat import ChatPromptTemplate
+__all__ = ["PromptManager"]
+class PromptManager:
+    """
+    Central class for storing and invoking prompt templates.
+    Example:
+        pm = PromptManager()
+        prompt_text = pm.render_prompt("greeting")
+        print(prompt_text)
+        pm = PromptManager()
+        prompt_text = pm.render_prompt("todo_task", {"task": "Plan a deep learning project for image recognition"})
+        print(prompt_text)
+    """
+    def __init__(self):
+        self.templates = {
+            "coding_python": """You are a Python developer.
+Human: {question}
+Assistant:""",
+            "greeting": """You are a friendly assistant.
+Human: Hello!
+Assistant: Hi there! How can I assist you today?""",
+            "goodbye": """You are a friendly assistant.
+Human: Goodbye!
+Assistant: Goodbye! Have a great day!""",
+            "todo_task": """You are a helpful assistant.
+Human: Please create a to-do list for the following task: {task}
+Assistant:""",
+            "map_function": "*map(lambda x: image_url, baseframes_list)",
+            "SQL_AGENT_SYS_PROMPT": """You are an expert SQL agent. Your task is to generate and execute SQL queries based on user requests.
+RULES:
+- THINK step by step before answering.
+- Use the provided database schema to inform your queries.
+- When you need to retrieve data, generate a SQL query and execute it using the provided tools.
+- Read-only mode: Do not attempt to modify the database.
+- NO INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE statements allowed.
+- LIMIT your results to 10 rows. Unless specified otherwise.
+- If you encounter an error while executing a query, analyze the error message and adjust your query accordingly.
+- Prefer using explicit column names instead of SELECT * for better performance.
+- Always ensure your SQL syntax is correct. """
+        }
+    def get_template(self, name: str) -> str:
+        """
+        Get a prompt template by name.
+        Args:
+            name (str): The key name of the prompt.
+        Returns:
+            str: The prompt template string.
+        """
+        template = self.templates.get(name)
+        if not template:
+            raise ValueError(f"Prompt '{name}' not found. Available prompts: {list(self.templates.keys())}")
+        return template
+    def render_prompt(self, name: str, context: dict = None) -> str:
+        """
+        Fill and return a rendered prompt string.
+        Args:
+            name (str): The key name of the prompt.
+            context (dict): Variables to fill into the template.
+        Returns:
+            str: The final rendered prompt text.
+        """
+        template = self.get_template(name)
+        chat_prompt = ChatPromptTemplate.from_template(template)
+        rendered = chat_prompt.invoke(context or {})
+        return rendered.to_string()

{mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/rag/embeddings.py RENAMED Viewed

@@ -57,21 +57,20 @@ import os
 import shutil
 import importlib.util
 from typing import List, Dict, Optional, Union, Any
-from langchain.text_splitter import (
+from langchain_text_splitters import (
     CharacterTextSplitter,
     RecursiveCharacterTextSplitter,
     SentenceTransformersTokenTextSplitter,
     TokenTextSplitter,
-    MarkdownHeaderTextSplitter,
-    SemanticChunker)
+    MarkdownHeaderTextSplitter)
 from langchain_community.document_loaders import TextLoader, FireCrawlLoader
 from langchain_chroma import Chroma
 from ..utils.extra import load_env_file
-from langchain.chains import create_history_aware_retriever, create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
+# from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+# from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain.retrievers import ContextualCompressionRetriever
+# from langchain.retrievers import ContextualCompressionRetriever
 from langchain_community.document_compressors import FlashrankRerank
 load_env_file()
@@ -317,10 +316,6 @@ class TextProcessor:
                 chunk_size=chunk_size,
                 chunk_overlap=chunk_overlap
             ),
-            'semantic_chunker': SemanticChunker(
-                chunk_size=chunk_size,
-                chunk_overlap=chunk_overlap
-            )
         }
         if text_splitter_type not in splitters:
@@ -584,32 +579,32 @@ class embedding_generator:
             retriever = self.retriever
         return retriever.get_relevant_documents(query)
-    def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
-        """
-        Load a ContextualCompressionRetriever using FlashrankRerank.
-        Args:
-            base_retriever: Existing retriever (if None, uses self.retriever)
-            model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
-            top_n (int): Number of top documents to return after reranking
-        Returns:
-            ContextualCompressionRetriever: A compression-based retriever using Flashrank
-        """
-        if base_retriever is None:
-            base_retriever = self.retriever
-        if base_retriever is None:
-            raise ValueError("Base retriever is required.")
-        compressor = FlashrankRerank(model=model_name, top_n=top_n)
-        self.compression_retriever = ContextualCompressionRetriever(
-            base_compressor=compressor,
-            base_retriever=base_retriever
-        )
-        if self.logger:
-            self.logger.info("Loaded Flashrank compression retriever.")
-        return self.compression_retriever
+    # def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
+    #     """
+    #     Load a ContextualCompressionRetriever using FlashrankRerank.
+    #     Args:
+    #         base_retriever: Existing retriever (if None, uses self.retriever)
+    #         model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
+    #         top_n (int): Number of top documents to return after reranking
+    #     Returns:
+    #         ContextualCompressionRetriever: A compression-based retriever using Flashrank
+    #     """
+    #     if base_retriever is None:
+    #         base_retriever = self.retriever
+    #     if base_retriever is None:
+    #         raise ValueError("Base retriever is required.")
+    #     compressor = FlashrankRerank(model=model_name, top_n=top_n)
+    #     self.compression_retriever = ContextualCompressionRetriever(
+    #         base_compressor=compressor,
+    #         base_retriever=base_retriever
+    #     )
+    #     if self.logger:
+    #         self.logger.info("Loaded Flashrank compression retriever.")
+    #     return self.compression_retriever
     def compression_invoke(self, query: str):
         """
@@ -627,58 +622,58 @@ class embedding_generator:
             print("Compression retriever loaded.")
         return self.compression_retriever.invoke(query)
-    def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
-        """
-        Generate RAG chain for conversation.
-        Args:
-            context_prompt (str): Optional context prompt
-            retriever: Optional retriever instance
-            llm: Optional language model instance
-        Returns:
-            Any: Generated RAG chain
-        Example:
-            ```python
-            rag_chain = gen.generate_rag_chain(retriever=retriever)
-            ```
-        """
-        if context_prompt is None:
-            context_prompt = ("You are an assistant for question-answering tasks. "
-                            "Use the following pieces of retrieved context to answer the question. "
-                            "If you don't know the answer, just say that you don't know. "
-                            "Use three sentences maximum and keep the answer concise.\n\n{context}")
-        contextualize_q_system_prompt = ("Given a chat history and the latest user question "
-                                       "which might reference context in the chat history, "
-                                       "formulate a standalone question which can be understood, "
-                                       "just reformulate it if needed and otherwise return it as is.")
-        contextualize_q_prompt = ChatPromptTemplate.from_messages([
-            ("system", contextualize_q_system_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ])
-        if retriever is None:
-            retriever = self.retriever
-        if llm is None:
-            if not ModelProvider.check_package("langchain_openai"):
-                raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
-            from langchain_openai import ChatOpenAI
-            llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
-        history_aware_retriever = create_history_aware_retriever(llm, retriever,
-                                                               contextualize_q_prompt)
-        qa_prompt = ChatPromptTemplate.from_messages([
-            ("system", context_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ])
-        question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
-        rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
-        return rag_chain
+    # def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
+    #     """
+    #     Generate RAG chain for conversation.
+    #     Args:
+    #         context_prompt (str): Optional context prompt
+    #         retriever: Optional retriever instance
+    #         llm: Optional language model instance
+    #     Returns:
+    #         Any: Generated RAG chain
+    #     Example:
+    #         ```python
+    #         rag_chain = gen.generate_rag_chain(retriever=retriever)
+    #         ```
+    #     """
+    #     if context_prompt is None:
+    #         context_prompt = ("You are an assistant for question-answering tasks. "
+    #                         "Use the following pieces of retrieved context to answer the question. "
+    #                         "If you don't know the answer, just say that you don't know. "
+    #                         "Use three sentences maximum and keep the answer concise.\n\n{context}")
+    #     contextualize_q_system_prompt = ("Given a chat history and the latest user question "
+    #                                    "which might reference context in the chat history, "
+    #                                    "formulate a standalone question which can be understood, "
+    #                                    "just reformulate it if needed and otherwise return it as is.")
+    #     contextualize_q_prompt = ChatPromptTemplate.from_messages([
+    #         ("system", contextualize_q_system_prompt),
+    #         MessagesPlaceholder("chat_history"),
+    #         ("human", "{input}"),
+    #     ])
+    #     if retriever is None:
+    #         retriever = self.retriever
+    #     if llm is None:
+    #         if not ModelProvider.check_package("langchain_openai"):
+    #             raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
+    #         from langchain_openai import ChatOpenAI
+    #         llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
+    #     history_aware_retriever = create_history_aware_retriever(llm, retriever,
+    #                                                            contextualize_q_prompt)
+    #     qa_prompt = ChatPromptTemplate.from_messages([
+    #         ("system", context_prompt),
+    #         MessagesPlaceholder("chat_history"),
+    #         ("human", "{input}"),
+    #     ])
+    #     question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+    #     rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+    #     return rag_chain
     def conversation_chain(self, query: str, rag_chain, file: str = None):
         """

mb_rag-1.1.67/mb_rag/utils/llm_wrapper.py ADDED Viewed

@@ -0,0 +1,31 @@
+## simple llm wrapper to replace invoke with invoke_query/own model query
+__all__ = ["LLMWrapper"]
+class LLMWrapper:
+    """A simple wrapper for the language model to standardize the invoke method.
+    """
+    def __init__(self, llm):
+        self.llm = llm
+    def __getattr__(self, name):
+        """Get all attributes from llm module. (invoke_query, invoke_query_threads, etc.)"""
+        return getattr(self.llm, name)
+    def invoke(self, use_threads=False,**kwargs) -> str:
+        """
+        Invoke the language model with a list of messages.
+        Using invoke_query method of the underlying model.
+        Check ModelFactory for more details.
+        Args:
+            use_threads (bool): Whether to use threading for invocation. Defaults to False.
+            **kwargs: Keyword arguments for the model invocation.
+        Returns:
+            str: The generated response.
+        """
+        if use_threads:
+            return self.llm.invoke_query_threads(**kwargs)
+        return self.llm.invoke_query(**kwargs)

mb_rag-1.1.67/mb_rag/utils/viewer.py ADDED Viewed

@@ -0,0 +1,8 @@
+from IPython.display import Image, display
+__all__ = ["display_graph_png"]
+def display_graph_png(graph):
+    """Display a graph using IPython's display capabilities."""
+    display(Image(graph.get_graph().draw_mermaid_png()))

{mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/version.py RENAMED Viewed

@@ -1,5 +1,5 @@
 MAJOR_VERSION = 1
 MINOR_VERSION = 1
-PATCH_VERSION = 61
+PATCH_VERSION = 67
 version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
 __all__  = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']

{mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mb_rag
-Version: 1.1.61
+Version: 1.1.67
 Summary: RAG function file
 Author: ['Malav Bateriwala']
 Requires-Python: >=3.8

{mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,6 +3,7 @@ pyproject.toml
 setup.py
 mb_rag/__init__.py
 mb_rag/basic.py
+mb_rag/prompts_bank.py
 mb_rag/version.py
 mb_rag.egg-info/PKG-INFO
 mb_rag.egg-info/SOURCES.txt
@@ -12,7 +13,6 @@ mb_rag.egg-info/top_level.txt
 mb_rag/chatbot/__init__.py
 mb_rag/chatbot/chains.py
 mb_rag/chatbot/conversation.py
-mb_rag/chatbot/prompts.py
 mb_rag/rag/__init__.py
 mb_rag/rag/embeddings.py
 mb_rag/utils/__init__.py
@@ -20,4 +20,6 @@ mb_rag/utils/all_data_extract.py
 mb_rag/utils/bounding_box.py
 mb_rag/utils/document_extract.py
 mb_rag/utils/extra.py
-mb_rag/utils/pdf_extract.py
+mb_rag/utils/llm_wrapper.py
+mb_rag/utils/pdf_extract.py
+mb_rag/utils/viewer.py

mb_rag-1.1.61/mb_rag/chatbot/prompts.py DELETED Viewed

@@ -1,59 +0,0 @@
-## file for storing basic prompts template
-from langchain.prompts import ChatPromptTemplate
-__all__ = ["prompts", "invoke_prompt"]
-class prompts:
-    """
-    Class to get different prompts example for chatbot and templates
-    """
-    def get_code_prompts(self):
-        """
-        Get code prompts
-        Returns:
-            str: Code prompt
-        """
-        list_code_prompts = {'coding_python ': """You are a Python developer.
-                                                Human: {}"""}
-    def get_text_prompts(self):
-        """
-        Get text prompts
-        Returns:
-            str: Text prompt
-        """
-        list_text_prompts = {
-            'multiple_placeholders': """You are a helpful assistant.
-                                        Human: Tell me a more about {adjective1} and its relation to {adjective2}.
-                                        Assistant:"""
-        }
-    def get_image_prompts(self):
-        """
-        Get image prompts
-        Returns:
-            str: Image prompt
-        """
-        list_image_prompts = {'map_function': "*map(lambda x: image_url, baseframes_list)"} # for passing multiple images from a video or a list of images
-    def get_assistant_prompts(self):
-        """
-        Get assistant prompts
-        Returns:
-            str: Assistant prompt
-        """
-        list_assistant_prompts = {}
-def invoke_prompt(template: str, input_dict : dict = None):
-    """
-    Invoke a prompt
-    Args:
-        template (str): Template for the prompt
-        input_dict (dict): Input dictionary for the prompt
-    Returns:
-        str: Prompt
-    """
-    prompt_multiple = ChatPromptTemplate.from_template(template)
-    prompt = prompt_multiple.invoke(input_dict)
-    return prompt