mb-rag 1.1.61__tar.gz → 1.1.67__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mb_rag-1.1.61 → mb_rag-1.1.67}/PKG-INFO +1 -1
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/basic.py +2 -0
- mb_rag-1.1.67/mb_rag/prompts_bank.py +77 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/rag/embeddings.py +83 -88
- mb_rag-1.1.67/mb_rag/utils/llm_wrapper.py +31 -0
- mb_rag-1.1.67/mb_rag/utils/viewer.py +8 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/version.py +1 -1
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/PKG-INFO +1 -1
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/SOURCES.txt +4 -2
- mb_rag-1.1.61/mb_rag/chatbot/prompts.py +0 -59
- {mb_rag-1.1.61 → mb_rag-1.1.67}/README.md +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/__init__.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/__init__.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/chains.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/chatbot/conversation.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/rag/__init__.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/__init__.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/all_data_extract.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/bounding_box.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/document_extract.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/extra.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag/utils/pdf_extract.py +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/dependency_links.txt +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/requires.txt +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/mb_rag.egg-info/top_level.txt +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/pyproject.toml +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/setup.cfg +0 -0
- {mb_rag-1.1.61 → mb_rag-1.1.67}/setup.py +0 -0
|
@@ -357,6 +357,8 @@ class ModelFactory:
|
|
|
357
357
|
Returns:
|
|
358
358
|
str: Output from the model
|
|
359
359
|
"""
|
|
360
|
+
if not isinstance(images, list):
|
|
361
|
+
images = [images]
|
|
360
362
|
base64_images = [self._image_to_base64(image) for image in images]
|
|
361
363
|
image_prompt_create = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_images[i]}"}} for i in range(len(images))]
|
|
362
364
|
prompt_new = [{"type": "text", "text": prompt}, *image_prompt_create]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
2
|
+
|
|
3
|
+
__all__ = ["PromptManager"]
|
|
4
|
+
|
|
5
|
+
class PromptManager:
|
|
6
|
+
"""
|
|
7
|
+
Central class for storing and invoking prompt templates.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
pm = PromptManager()
|
|
11
|
+
prompt_text = pm.render_prompt("greeting")
|
|
12
|
+
print(prompt_text)
|
|
13
|
+
|
|
14
|
+
pm = PromptManager()
|
|
15
|
+
prompt_text = pm.render_prompt("todo_task", {"task": "Plan a deep learning project for image recognition"})
|
|
16
|
+
print(prompt_text)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
self.templates = {
|
|
21
|
+
"coding_python": """You are a Python developer.
|
|
22
|
+
Human: {question}
|
|
23
|
+
Assistant:""",
|
|
24
|
+
|
|
25
|
+
"greeting": """You are a friendly assistant.
|
|
26
|
+
Human: Hello!
|
|
27
|
+
Assistant: Hi there! How can I assist you today?""",
|
|
28
|
+
|
|
29
|
+
"goodbye": """You are a friendly assistant.
|
|
30
|
+
Human: Goodbye!
|
|
31
|
+
Assistant: Goodbye! Have a great day!""",
|
|
32
|
+
|
|
33
|
+
"todo_task": """You are a helpful assistant.
|
|
34
|
+
Human: Please create a to-do list for the following task: {task}
|
|
35
|
+
Assistant:""",
|
|
36
|
+
|
|
37
|
+
"map_function": "*map(lambda x: image_url, baseframes_list)",
|
|
38
|
+
|
|
39
|
+
"SQL_AGENT_SYS_PROMPT": """You are an expert SQL agent. Your task is to generate and execute SQL queries based on user requests.
|
|
40
|
+
RULES:
|
|
41
|
+
- THINK step by step before answering.
|
|
42
|
+
- Use the provided database schema to inform your queries.
|
|
43
|
+
- When you need to retrieve data, generate a SQL query and execute it using the provided tools.
|
|
44
|
+
- Read-only mode: Do not attempt to modify the database.
|
|
45
|
+
- NO INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE statements allowed.
|
|
46
|
+
- LIMIT your results to 10 rows. Unless specified otherwise.
|
|
47
|
+
- If you encounter an error while executing a query, analyze the error message and adjust your query accordingly.
|
|
48
|
+
- Prefer using explicit column names instead of SELECT * for better performance.
|
|
49
|
+
- Always ensure your SQL syntax is correct. """
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def get_template(self, name: str) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Get a prompt template by name.
|
|
55
|
+
Args:
|
|
56
|
+
name (str): The key name of the prompt.
|
|
57
|
+
Returns:
|
|
58
|
+
str: The prompt template string.
|
|
59
|
+
"""
|
|
60
|
+
template = self.templates.get(name)
|
|
61
|
+
if not template:
|
|
62
|
+
raise ValueError(f"Prompt '{name}' not found. Available prompts: {list(self.templates.keys())}")
|
|
63
|
+
return template
|
|
64
|
+
|
|
65
|
+
def render_prompt(self, name: str, context: dict = None) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Fill and return a rendered prompt string.
|
|
68
|
+
Args:
|
|
69
|
+
name (str): The key name of the prompt.
|
|
70
|
+
context (dict): Variables to fill into the template.
|
|
71
|
+
Returns:
|
|
72
|
+
str: The final rendered prompt text.
|
|
73
|
+
"""
|
|
74
|
+
template = self.get_template(name)
|
|
75
|
+
chat_prompt = ChatPromptTemplate.from_template(template)
|
|
76
|
+
rendered = chat_prompt.invoke(context or {})
|
|
77
|
+
return rendered.to_string()
|
|
@@ -57,21 +57,20 @@ import os
|
|
|
57
57
|
import shutil
|
|
58
58
|
import importlib.util
|
|
59
59
|
from typing import List, Dict, Optional, Union, Any
|
|
60
|
-
from
|
|
60
|
+
from langchain_text_splitters import (
|
|
61
61
|
CharacterTextSplitter,
|
|
62
62
|
RecursiveCharacterTextSplitter,
|
|
63
63
|
SentenceTransformersTokenTextSplitter,
|
|
64
64
|
TokenTextSplitter,
|
|
65
|
-
MarkdownHeaderTextSplitter
|
|
66
|
-
SemanticChunker)
|
|
65
|
+
MarkdownHeaderTextSplitter)
|
|
67
66
|
from langchain_community.document_loaders import TextLoader, FireCrawlLoader
|
|
68
67
|
from langchain_chroma import Chroma
|
|
69
68
|
from ..utils.extra import load_env_file
|
|
70
|
-
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
|
71
|
-
from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
69
|
+
# from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
|
70
|
+
# from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
72
71
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
73
72
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
74
|
-
from langchain.retrievers import ContextualCompressionRetriever
|
|
73
|
+
# from langchain.retrievers import ContextualCompressionRetriever
|
|
75
74
|
from langchain_community.document_compressors import FlashrankRerank
|
|
76
75
|
|
|
77
76
|
load_env_file()
|
|
@@ -317,10 +316,6 @@ class TextProcessor:
|
|
|
317
316
|
chunk_size=chunk_size,
|
|
318
317
|
chunk_overlap=chunk_overlap
|
|
319
318
|
),
|
|
320
|
-
'semantic_chunker': SemanticChunker(
|
|
321
|
-
chunk_size=chunk_size,
|
|
322
|
-
chunk_overlap=chunk_overlap
|
|
323
|
-
)
|
|
324
319
|
}
|
|
325
320
|
|
|
326
321
|
if text_splitter_type not in splitters:
|
|
@@ -584,32 +579,32 @@ class embedding_generator:
|
|
|
584
579
|
retriever = self.retriever
|
|
585
580
|
return retriever.get_relevant_documents(query)
|
|
586
581
|
|
|
587
|
-
def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
582
|
+
# def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
|
|
583
|
+
# """
|
|
584
|
+
# Load a ContextualCompressionRetriever using FlashrankRerank.
|
|
585
|
+
|
|
586
|
+
# Args:
|
|
587
|
+
# base_retriever: Existing retriever (if None, uses self.retriever)
|
|
588
|
+
# model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
|
|
589
|
+
# top_n (int): Number of top documents to return after reranking
|
|
590
|
+
|
|
591
|
+
# Returns:
|
|
592
|
+
# ContextualCompressionRetriever: A compression-based retriever using Flashrank
|
|
593
|
+
# """
|
|
594
|
+
# if base_retriever is None:
|
|
595
|
+
# base_retriever = self.retriever
|
|
596
|
+
# if base_retriever is None:
|
|
597
|
+
# raise ValueError("Base retriever is required.")
|
|
598
|
+
|
|
599
|
+
# compressor = FlashrankRerank(model=model_name, top_n=top_n)
|
|
600
|
+
# self.compression_retriever = ContextualCompressionRetriever(
|
|
601
|
+
# base_compressor=compressor,
|
|
602
|
+
# base_retriever=base_retriever
|
|
603
|
+
# )
|
|
604
|
+
|
|
605
|
+
# if self.logger:
|
|
606
|
+
# self.logger.info("Loaded Flashrank compression retriever.")
|
|
607
|
+
# return self.compression_retriever
|
|
613
608
|
|
|
614
609
|
def compression_invoke(self, query: str):
|
|
615
610
|
"""
|
|
@@ -627,58 +622,58 @@ class embedding_generator:
|
|
|
627
622
|
print("Compression retriever loaded.")
|
|
628
623
|
return self.compression_retriever.invoke(query)
|
|
629
624
|
|
|
630
|
-
def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
625
|
+
# def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
|
|
626
|
+
# """
|
|
627
|
+
# Generate RAG chain for conversation.
|
|
628
|
+
|
|
629
|
+
# Args:
|
|
630
|
+
# context_prompt (str): Optional context prompt
|
|
631
|
+
# retriever: Optional retriever instance
|
|
632
|
+
# llm: Optional language model instance
|
|
633
|
+
|
|
634
|
+
# Returns:
|
|
635
|
+
# Any: Generated RAG chain
|
|
636
|
+
|
|
637
|
+
# Example:
|
|
638
|
+
# ```python
|
|
639
|
+
# rag_chain = gen.generate_rag_chain(retriever=retriever)
|
|
640
|
+
# ```
|
|
641
|
+
# """
|
|
642
|
+
# if context_prompt is None:
|
|
643
|
+
# context_prompt = ("You are an assistant for question-answering tasks. "
|
|
644
|
+
# "Use the following pieces of retrieved context to answer the question. "
|
|
645
|
+
# "If you don't know the answer, just say that you don't know. "
|
|
646
|
+
# "Use three sentences maximum and keep the answer concise.\n\n{context}")
|
|
647
|
+
|
|
648
|
+
# contextualize_q_system_prompt = ("Given a chat history and the latest user question "
|
|
649
|
+
# "which might reference context in the chat history, "
|
|
650
|
+
# "formulate a standalone question which can be understood, "
|
|
651
|
+
# "just reformulate it if needed and otherwise return it as is.")
|
|
652
|
+
|
|
653
|
+
# contextualize_q_prompt = ChatPromptTemplate.from_messages([
|
|
654
|
+
# ("system", contextualize_q_system_prompt),
|
|
655
|
+
# MessagesPlaceholder("chat_history"),
|
|
656
|
+
# ("human", "{input}"),
|
|
657
|
+
# ])
|
|
658
|
+
|
|
659
|
+
# if retriever is None:
|
|
660
|
+
# retriever = self.retriever
|
|
661
|
+
# if llm is None:
|
|
662
|
+
# if not ModelProvider.check_package("langchain_openai"):
|
|
663
|
+
# raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
|
|
664
|
+
# from langchain_openai import ChatOpenAI
|
|
665
|
+
# llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
|
|
666
|
+
|
|
667
|
+
# history_aware_retriever = create_history_aware_retriever(llm, retriever,
|
|
668
|
+
# contextualize_q_prompt)
|
|
669
|
+
# qa_prompt = ChatPromptTemplate.from_messages([
|
|
670
|
+
# ("system", context_prompt),
|
|
671
|
+
# MessagesPlaceholder("chat_history"),
|
|
672
|
+
# ("human", "{input}"),
|
|
673
|
+
# ])
|
|
674
|
+
# question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
|
|
675
|
+
# rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
|
|
676
|
+
# return rag_chain
|
|
682
677
|
|
|
683
678
|
def conversation_chain(self, query: str, rag_chain, file: str = None):
|
|
684
679
|
"""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
## simple llm wrapper to replace invoke with invoke_query/own model query
|
|
2
|
+
|
|
3
|
+
__all__ = ["LLMWrapper"]
|
|
4
|
+
|
|
5
|
+
class LLMWrapper:
|
|
6
|
+
"""A simple wrapper for the language model to standardize the invoke method.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, llm):
|
|
10
|
+
self.llm = llm
|
|
11
|
+
|
|
12
|
+
def __getattr__(self, name):
|
|
13
|
+
"""Get all attributes from llm module. (invoke_query, invoke_query_threads, etc.)"""
|
|
14
|
+
return getattr(self.llm, name)
|
|
15
|
+
|
|
16
|
+
def invoke(self, use_threads=False,**kwargs) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Invoke the language model with a list of messages.
|
|
19
|
+
Using invoke_query method of the underlying model.
|
|
20
|
+
Check ModelFactory for more details.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
use_threads (bool): Whether to use threading for invocation. Defaults to False.
|
|
24
|
+
**kwargs: Keyword arguments for the model invocation.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
str: The generated response.
|
|
28
|
+
"""
|
|
29
|
+
if use_threads:
|
|
30
|
+
return self.llm.invoke_query_threads(**kwargs)
|
|
31
|
+
return self.llm.invoke_query(**kwargs)
|
|
@@ -3,6 +3,7 @@ pyproject.toml
|
|
|
3
3
|
setup.py
|
|
4
4
|
mb_rag/__init__.py
|
|
5
5
|
mb_rag/basic.py
|
|
6
|
+
mb_rag/prompts_bank.py
|
|
6
7
|
mb_rag/version.py
|
|
7
8
|
mb_rag.egg-info/PKG-INFO
|
|
8
9
|
mb_rag.egg-info/SOURCES.txt
|
|
@@ -12,7 +13,6 @@ mb_rag.egg-info/top_level.txt
|
|
|
12
13
|
mb_rag/chatbot/__init__.py
|
|
13
14
|
mb_rag/chatbot/chains.py
|
|
14
15
|
mb_rag/chatbot/conversation.py
|
|
15
|
-
mb_rag/chatbot/prompts.py
|
|
16
16
|
mb_rag/rag/__init__.py
|
|
17
17
|
mb_rag/rag/embeddings.py
|
|
18
18
|
mb_rag/utils/__init__.py
|
|
@@ -20,4 +20,6 @@ mb_rag/utils/all_data_extract.py
|
|
|
20
20
|
mb_rag/utils/bounding_box.py
|
|
21
21
|
mb_rag/utils/document_extract.py
|
|
22
22
|
mb_rag/utils/extra.py
|
|
23
|
-
mb_rag/utils/
|
|
23
|
+
mb_rag/utils/llm_wrapper.py
|
|
24
|
+
mb_rag/utils/pdf_extract.py
|
|
25
|
+
mb_rag/utils/viewer.py
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
## file for storing basic prompts template
|
|
2
|
-
from langchain.prompts import ChatPromptTemplate
|
|
3
|
-
|
|
4
|
-
__all__ = ["prompts", "invoke_prompt"]
|
|
5
|
-
|
|
6
|
-
class prompts:
|
|
7
|
-
"""
|
|
8
|
-
Class to get different prompts example for chatbot and templates
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
def get_code_prompts(self):
|
|
12
|
-
"""
|
|
13
|
-
Get code prompts
|
|
14
|
-
Returns:
|
|
15
|
-
str: Code prompt
|
|
16
|
-
"""
|
|
17
|
-
list_code_prompts = {'coding_python ': """You are a Python developer.
|
|
18
|
-
Human: {}"""}
|
|
19
|
-
|
|
20
|
-
def get_text_prompts(self):
|
|
21
|
-
"""
|
|
22
|
-
Get text prompts
|
|
23
|
-
Returns:
|
|
24
|
-
str: Text prompt
|
|
25
|
-
"""
|
|
26
|
-
list_text_prompts = {
|
|
27
|
-
'multiple_placeholders': """You are a helpful assistant.
|
|
28
|
-
Human: Tell me a more about {adjective1} and its relation to {adjective2}.
|
|
29
|
-
Assistant:"""
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
def get_image_prompts(self):
|
|
33
|
-
"""
|
|
34
|
-
Get image prompts
|
|
35
|
-
Returns:
|
|
36
|
-
str: Image prompt
|
|
37
|
-
"""
|
|
38
|
-
list_image_prompts = {'map_function': "*map(lambda x: image_url, baseframes_list)"} # for passing multiple images from a video or a list of images
|
|
39
|
-
|
|
40
|
-
def get_assistant_prompts(self):
|
|
41
|
-
"""
|
|
42
|
-
Get assistant prompts
|
|
43
|
-
Returns:
|
|
44
|
-
str: Assistant prompt
|
|
45
|
-
"""
|
|
46
|
-
list_assistant_prompts = {}
|
|
47
|
-
|
|
48
|
-
def invoke_prompt(template: str, input_dict : dict = None):
|
|
49
|
-
"""
|
|
50
|
-
Invoke a prompt
|
|
51
|
-
Args:
|
|
52
|
-
template (str): Template for the prompt
|
|
53
|
-
input_dict (dict): Input dictionary for the prompt
|
|
54
|
-
Returns:
|
|
55
|
-
str: Prompt
|
|
56
|
-
"""
|
|
57
|
-
prompt_multiple = ChatPromptTemplate.from_template(template)
|
|
58
|
-
prompt = prompt_multiple.invoke(input_dict)
|
|
59
|
-
return prompt
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|