mb-rag 1.1.61__tar.gz → 1.1.67__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mb_rag-1.1.61 → mb_rag-1.1.67}/PKG-INFO +1 -1
  2. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/basic.py +2 -0
  3. mb_rag-1.1.67/mb_rag/prompts_bank.py +77 -0
  4. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/rag/embeddings.py +83 -88
  5. mb_rag-1.1.67/mb_rag/utils/llm_wrapper.py +31 -0
  6. mb_rag-1.1.67/mb_rag/utils/viewer.py +8 -0
  7. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/version.py +1 -1
  8. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/PKG-INFO +1 -1
  9. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/SOURCES.txt +4 -2
  10. mb_rag-1.1.61/mb_rag/chatbot/prompts.py +0 -59
  11. {mb_rag-1.1.61 → mb_rag-1.1.67}/README.md +0 -0
  12. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/__init__.py +0 -0
  13. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/__init__.py +0 -0
  14. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/chains.py +0 -0
  15. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/conversation.py +0 -0
  16. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/rag/__init__.py +0 -0
  17. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/__init__.py +0 -0
  18. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/all_data_extract.py +0 -0
  19. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/bounding_box.py +0 -0
  20. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/document_extract.py +0 -0
  21. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/extra.py +0 -0
  22. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/pdf_extract.py +0 -0
  23. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/dependency_links.txt +0 -0
  24. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/requires.txt +0 -0
  25. {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/top_level.txt +0 -0
  26. {mb_rag-1.1.61 → mb_rag-1.1.67}/pyproject.toml +0 -0
  27. {mb_rag-1.1.61 → mb_rag-1.1.67}/setup.cfg +0 -0
  28. {mb_rag-1.1.61 → mb_rag-1.1.67}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.61
3
+ Version: 1.1.67
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
@@ -357,6 +357,8 @@ class ModelFactory:
357
357
  Returns:
358
358
  str: Output from the model
359
359
  """
360
+ if not isinstance(images, list):
361
+ images = [images]
360
362
  base64_images = [self._image_to_base64(image) for image in images]
361
363
  image_prompt_create = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_images[i]}"}} for i in range(len(images))]
362
364
  prompt_new = [{"type": "text", "text": prompt}, *image_prompt_create]
@@ -0,0 +1,77 @@
1
+ from langchain_core.prompts.chat import ChatPromptTemplate
2
+
3
+ __all__ = ["PromptManager"]
4
+
5
+ class PromptManager:
6
+ """
7
+ Central class for storing and invoking prompt templates.
8
+
9
+ Example:
10
+ pm = PromptManager()
11
+ prompt_text = pm.render_prompt("greeting")
12
+ print(prompt_text)
13
+
14
+ pm = PromptManager()
15
+ prompt_text = pm.render_prompt("todo_task", {"task": "Plan a deep learning project for image recognition"})
16
+ print(prompt_text)
17
+ """
18
+
19
+ def __init__(self):
20
+ self.templates = {
21
+ "coding_python": """You are a Python developer.
22
+ Human: {question}
23
+ Assistant:""",
24
+
25
+ "greeting": """You are a friendly assistant.
26
+ Human: Hello!
27
+ Assistant: Hi there! How can I assist you today?""",
28
+
29
+ "goodbye": """You are a friendly assistant.
30
+ Human: Goodbye!
31
+ Assistant: Goodbye! Have a great day!""",
32
+
33
+ "todo_task": """You are a helpful assistant.
34
+ Human: Please create a to-do list for the following task: {task}
35
+ Assistant:""",
36
+
37
+ "map_function": "*map(lambda x: image_url, baseframes_list)",
38
+
39
+ "SQL_AGENT_SYS_PROMPT": """You are an expert SQL agent. Your task is to generate and execute SQL queries based on user requests.
40
+ RULES:
41
+ - THINK step by step before answering.
42
+ - Use the provided database schema to inform your queries.
43
+ - When you need to retrieve data, generate a SQL query and execute it using the provided tools.
44
+ - Read-only mode: Do not attempt to modify the database.
45
+ - NO INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE statements allowed.
46
+ - LIMIT your results to 10 rows. Unless specified otherwise.
47
+ - If you encounter an error while executing a query, analyze the error message and adjust your query accordingly.
48
+ - Prefer using explicit column names instead of SELECT * for better performance.
49
+ - Always ensure your SQL syntax is correct. """
50
+ }
51
+
52
+ def get_template(self, name: str) -> str:
53
+ """
54
+ Get a prompt template by name.
55
+ Args:
56
+ name (str): The key name of the prompt.
57
+ Returns:
58
+ str: The prompt template string.
59
+ """
60
+ template = self.templates.get(name)
61
+ if not template:
62
+ raise ValueError(f"Prompt '{name}' not found. Available prompts: {list(self.templates.keys())}")
63
+ return template
64
+
65
+ def render_prompt(self, name: str, context: dict = None) -> str:
66
+ """
67
+ Fill and return a rendered prompt string.
68
+ Args:
69
+ name (str): The key name of the prompt.
70
+ context (dict): Variables to fill into the template.
71
+ Returns:
72
+ str: The final rendered prompt text.
73
+ """
74
+ template = self.get_template(name)
75
+ chat_prompt = ChatPromptTemplate.from_template(template)
76
+ rendered = chat_prompt.invoke(context or {})
77
+ return rendered.to_string()
@@ -57,21 +57,20 @@ import os
57
57
  import shutil
58
58
  import importlib.util
59
59
  from typing import List, Dict, Optional, Union, Any
60
- from langchain.text_splitter import (
60
+ from langchain_text_splitters import (
61
61
  CharacterTextSplitter,
62
62
  RecursiveCharacterTextSplitter,
63
63
  SentenceTransformersTokenTextSplitter,
64
64
  TokenTextSplitter,
65
- MarkdownHeaderTextSplitter,
66
- SemanticChunker)
65
+ MarkdownHeaderTextSplitter)
67
66
  from langchain_community.document_loaders import TextLoader, FireCrawlLoader
68
67
  from langchain_chroma import Chroma
69
68
  from ..utils.extra import load_env_file
70
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
71
- from langchain.chains.combine_documents import create_stuff_documents_chain
69
+ # from langchain.chains import create_history_aware_retriever, create_retrieval_chain
70
+ # from langchain.chains.combine_documents import create_stuff_documents_chain
72
71
  from langchain_core.messages import HumanMessage, SystemMessage
73
72
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
74
- from langchain.retrievers import ContextualCompressionRetriever
73
+ # from langchain.retrievers import ContextualCompressionRetriever
75
74
  from langchain_community.document_compressors import FlashrankRerank
76
75
 
77
76
  load_env_file()
@@ -317,10 +316,6 @@ class TextProcessor:
317
316
  chunk_size=chunk_size,
318
317
  chunk_overlap=chunk_overlap
319
318
  ),
320
- 'semantic_chunker': SemanticChunker(
321
- chunk_size=chunk_size,
322
- chunk_overlap=chunk_overlap
323
- )
324
319
  }
325
320
 
326
321
  if text_splitter_type not in splitters:
@@ -584,32 +579,32 @@ class embedding_generator:
584
579
  retriever = self.retriever
585
580
  return retriever.get_relevant_documents(query)
586
581
 
587
- def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
588
- """
589
- Load a ContextualCompressionRetriever using FlashrankRerank.
590
-
591
- Args:
592
- base_retriever: Existing retriever (if None, uses self.retriever)
593
- model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
594
- top_n (int): Number of top documents to return after reranking
595
-
596
- Returns:
597
- ContextualCompressionRetriever: A compression-based retriever using Flashrank
598
- """
599
- if base_retriever is None:
600
- base_retriever = self.retriever
601
- if base_retriever is None:
602
- raise ValueError("Base retriever is required.")
603
-
604
- compressor = FlashrankRerank(model=model_name, top_n=top_n)
605
- self.compression_retriever = ContextualCompressionRetriever(
606
- base_compressor=compressor,
607
- base_retriever=base_retriever
608
- )
609
-
610
- if self.logger:
611
- self.logger.info("Loaded Flashrank compression retriever.")
612
- return self.compression_retriever
582
+ # def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
583
+ # """
584
+ # Load a ContextualCompressionRetriever using FlashrankRerank.
585
+
586
+ # Args:
587
+ # base_retriever: Existing retriever (if None, uses self.retriever)
588
+ # model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
589
+ # top_n (int): Number of top documents to return after reranking
590
+
591
+ # Returns:
592
+ # ContextualCompressionRetriever: A compression-based retriever using Flashrank
593
+ # """
594
+ # if base_retriever is None:
595
+ # base_retriever = self.retriever
596
+ # if base_retriever is None:
597
+ # raise ValueError("Base retriever is required.")
598
+
599
+ # compressor = FlashrankRerank(model=model_name, top_n=top_n)
600
+ # self.compression_retriever = ContextualCompressionRetriever(
601
+ # base_compressor=compressor,
602
+ # base_retriever=base_retriever
603
+ # )
604
+
605
+ # if self.logger:
606
+ # self.logger.info("Loaded Flashrank compression retriever.")
607
+ # return self.compression_retriever
613
608
 
614
609
  def compression_invoke(self, query: str):
615
610
  """
@@ -627,58 +622,58 @@ class embedding_generator:
627
622
  print("Compression retriever loaded.")
628
623
  return self.compression_retriever.invoke(query)
629
624
 
630
- def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
631
- """
632
- Generate RAG chain for conversation.
633
-
634
- Args:
635
- context_prompt (str): Optional context prompt
636
- retriever: Optional retriever instance
637
- llm: Optional language model instance
638
-
639
- Returns:
640
- Any: Generated RAG chain
641
-
642
- Example:
643
- ```python
644
- rag_chain = gen.generate_rag_chain(retriever=retriever)
645
- ```
646
- """
647
- if context_prompt is None:
648
- context_prompt = ("You are an assistant for question-answering tasks. "
649
- "Use the following pieces of retrieved context to answer the question. "
650
- "If you don't know the answer, just say that you don't know. "
651
- "Use three sentences maximum and keep the answer concise.\n\n{context}")
652
-
653
- contextualize_q_system_prompt = ("Given a chat history and the latest user question "
654
- "which might reference context in the chat history, "
655
- "formulate a standalone question which can be understood, "
656
- "just reformulate it if needed and otherwise return it as is.")
657
-
658
- contextualize_q_prompt = ChatPromptTemplate.from_messages([
659
- ("system", contextualize_q_system_prompt),
660
- MessagesPlaceholder("chat_history"),
661
- ("human", "{input}"),
662
- ])
663
-
664
- if retriever is None:
665
- retriever = self.retriever
666
- if llm is None:
667
- if not ModelProvider.check_package("langchain_openai"):
668
- raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
669
- from langchain_openai import ChatOpenAI
670
- llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
671
-
672
- history_aware_retriever = create_history_aware_retriever(llm, retriever,
673
- contextualize_q_prompt)
674
- qa_prompt = ChatPromptTemplate.from_messages([
675
- ("system", context_prompt),
676
- MessagesPlaceholder("chat_history"),
677
- ("human", "{input}"),
678
- ])
679
- question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
680
- rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
681
- return rag_chain
625
+ # def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
626
+ # """
627
+ # Generate RAG chain for conversation.
628
+
629
+ # Args:
630
+ # context_prompt (str): Optional context prompt
631
+ # retriever: Optional retriever instance
632
+ # llm: Optional language model instance
633
+
634
+ # Returns:
635
+ # Any: Generated RAG chain
636
+
637
+ # Example:
638
+ # ```python
639
+ # rag_chain = gen.generate_rag_chain(retriever=retriever)
640
+ # ```
641
+ # """
642
+ # if context_prompt is None:
643
+ # context_prompt = ("You are an assistant for question-answering tasks. "
644
+ # "Use the following pieces of retrieved context to answer the question. "
645
+ # "If you don't know the answer, just say that you don't know. "
646
+ # "Use three sentences maximum and keep the answer concise.\n\n{context}")
647
+
648
+ # contextualize_q_system_prompt = ("Given a chat history and the latest user question "
649
+ # "which might reference context in the chat history, "
650
+ # "formulate a standalone question which can be understood, "
651
+ # "just reformulate it if needed and otherwise return it as is.")
652
+
653
+ # contextualize_q_prompt = ChatPromptTemplate.from_messages([
654
+ # ("system", contextualize_q_system_prompt),
655
+ # MessagesPlaceholder("chat_history"),
656
+ # ("human", "{input}"),
657
+ # ])
658
+
659
+ # if retriever is None:
660
+ # retriever = self.retriever
661
+ # if llm is None:
662
+ # if not ModelProvider.check_package("langchain_openai"):
663
+ # raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
664
+ # from langchain_openai import ChatOpenAI
665
+ # llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
666
+
667
+ # history_aware_retriever = create_history_aware_retriever(llm, retriever,
668
+ # contextualize_q_prompt)
669
+ # qa_prompt = ChatPromptTemplate.from_messages([
670
+ # ("system", context_prompt),
671
+ # MessagesPlaceholder("chat_history"),
672
+ # ("human", "{input}"),
673
+ # ])
674
+ # question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
675
+ # rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
676
+ # return rag_chain
682
677
 
683
678
  def conversation_chain(self, query: str, rag_chain, file: str = None):
684
679
  """
@@ -0,0 +1,31 @@
1
+ ## simple llm wrapper to replace invoke with invoke_query/own model query
2
+
3
+ __all__ = ["LLMWrapper"]
4
+
5
+ class LLMWrapper:
6
+ """A simple wrapper for the language model to standardize the invoke method.
7
+ """
8
+
9
+ def __init__(self, llm):
10
+ self.llm = llm
11
+
12
+ def __getattr__(self, name):
13
+ """Get all attributes from llm module. (invoke_query, invoke_query_threads, etc.)"""
14
+ return getattr(self.llm, name)
15
+
16
+ def invoke(self, use_threads=False,**kwargs) -> str:
17
+ """
18
+ Invoke the language model with a list of messages.
19
+ Using invoke_query method of the underlying model.
20
+ Check ModelFactory for more details.
21
+
22
+ Args:
23
+ use_threads (bool): Whether to use threading for invocation. Defaults to False.
24
+ **kwargs: Keyword arguments for the model invocation.
25
+
26
+ Returns:
27
+ str: The generated response.
28
+ """
29
+ if use_threads:
30
+ return self.llm.invoke_query_threads(**kwargs)
31
+ return self.llm.invoke_query(**kwargs)
@@ -0,0 +1,8 @@
1
+ from IPython.display import Image, display
2
+
3
+ __all__ = ["display_graph_png"]
4
+
5
+
6
+ def display_graph_png(graph):
7
+ """Display a graph using IPython's display capabilities."""
8
+ display(Image(graph.get_graph().draw_mermaid_png()))
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 1
3
- PATCH_VERSION = 61
3
+ PATCH_VERSION = 67
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.61
3
+ Version: 1.1.67
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
@@ -3,6 +3,7 @@ pyproject.toml
3
3
  setup.py
4
4
  mb_rag/__init__.py
5
5
  mb_rag/basic.py
6
+ mb_rag/prompts_bank.py
6
7
  mb_rag/version.py
7
8
  mb_rag.egg-info/PKG-INFO
8
9
  mb_rag.egg-info/SOURCES.txt
@@ -12,7 +13,6 @@ mb_rag.egg-info/top_level.txt
12
13
  mb_rag/chatbot/__init__.py
13
14
  mb_rag/chatbot/chains.py
14
15
  mb_rag/chatbot/conversation.py
15
- mb_rag/chatbot/prompts.py
16
16
  mb_rag/rag/__init__.py
17
17
  mb_rag/rag/embeddings.py
18
18
  mb_rag/utils/__init__.py
@@ -20,4 +20,6 @@ mb_rag/utils/all_data_extract.py
20
20
  mb_rag/utils/bounding_box.py
21
21
  mb_rag/utils/document_extract.py
22
22
  mb_rag/utils/extra.py
23
- mb_rag/utils/pdf_extract.py
23
+ mb_rag/utils/llm_wrapper.py
24
+ mb_rag/utils/pdf_extract.py
25
+ mb_rag/utils/viewer.py
@@ -1,59 +0,0 @@
1
- ## file for storing basic prompts template
2
- from langchain.prompts import ChatPromptTemplate
3
-
4
- __all__ = ["prompts", "invoke_prompt"]
5
-
6
- class prompts:
7
- """
8
- Class to get different prompts example for chatbot and templates
9
- """
10
-
11
- def get_code_prompts(self):
12
- """
13
- Get code prompts
14
- Returns:
15
- str: Code prompt
16
- """
17
- list_code_prompts = {'coding_python ': """You are a Python developer.
18
- Human: {}"""}
19
-
20
- def get_text_prompts(self):
21
- """
22
- Get text prompts
23
- Returns:
24
- str: Text prompt
25
- """
26
- list_text_prompts = {
27
- 'multiple_placeholders': """You are a helpful assistant.
28
- Human: Tell me a more about {adjective1} and its relation to {adjective2}.
29
- Assistant:"""
30
- }
31
-
32
- def get_image_prompts(self):
33
- """
34
- Get image prompts
35
- Returns:
36
- str: Image prompt
37
- """
38
- list_image_prompts = {'map_function': "*map(lambda x: image_url, baseframes_list)"} # for passing multiple images from a video or a list of images
39
-
40
- def get_assistant_prompts(self):
41
- """
42
- Get assistant prompts
43
- Returns:
44
- str: Assistant prompt
45
- """
46
- list_assistant_prompts = {}
47
-
48
- def invoke_prompt(template: str, input_dict : dict = None):
49
- """
50
- Invoke a prompt
51
- Args:
52
- template (str): Template for the prompt
53
- input_dict (dict): Input dictionary for the prompt
54
- Returns:
55
- str: Prompt
56
- """
57
- prompt_multiple = ChatPromptTemplate.from_template(template)
58
- prompt = prompt_multiple.invoke(input_dict)
59
- return prompt
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes