mb-rag 1.1.66__tar.gz → 1.1.67__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mb_rag-1.1.66 → mb_rag-1.1.67}/PKG-INFO +1 -1
  2. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/rag/embeddings.py +83 -88
  3. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/version.py +1 -1
  4. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag.egg-info/PKG-INFO +1 -1
  5. {mb_rag-1.1.66 → mb_rag-1.1.67}/README.md +0 -0
  6. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/__init__.py +0 -0
  7. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/basic.py +0 -0
  8. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/chatbot/__init__.py +0 -0
  9. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/chatbot/chains.py +0 -0
  10. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/chatbot/conversation.py +0 -0
  11. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/prompts_bank.py +0 -0
  12. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/rag/__init__.py +0 -0
  13. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/__init__.py +0 -0
  14. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/all_data_extract.py +0 -0
  15. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/bounding_box.py +0 -0
  16. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/document_extract.py +0 -0
  17. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/extra.py +0 -0
  18. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/llm_wrapper.py +0 -0
  19. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/pdf_extract.py +0 -0
  20. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag/utils/viewer.py +0 -0
  21. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag.egg-info/SOURCES.txt +0 -0
  22. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag.egg-info/dependency_links.txt +0 -0
  23. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag.egg-info/requires.txt +0 -0
  24. {mb_rag-1.1.66 → mb_rag-1.1.67}/mb_rag.egg-info/top_level.txt +0 -0
  25. {mb_rag-1.1.66 → mb_rag-1.1.67}/pyproject.toml +0 -0
  26. {mb_rag-1.1.66 → mb_rag-1.1.67}/setup.cfg +0 -0
  27. {mb_rag-1.1.66 → mb_rag-1.1.67}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.66
3
+ Version: 1.1.67
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
@@ -57,21 +57,20 @@ import os
57
57
  import shutil
58
58
  import importlib.util
59
59
  from typing import List, Dict, Optional, Union, Any
60
- from langchain.text_splitter import (
60
+ from langchain_text_splitters import (
61
61
  CharacterTextSplitter,
62
62
  RecursiveCharacterTextSplitter,
63
63
  SentenceTransformersTokenTextSplitter,
64
64
  TokenTextSplitter,
65
- MarkdownHeaderTextSplitter,
66
- SemanticChunker)
65
+ MarkdownHeaderTextSplitter)
67
66
  from langchain_community.document_loaders import TextLoader, FireCrawlLoader
68
67
  from langchain_chroma import Chroma
69
68
  from ..utils.extra import load_env_file
70
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
71
- from langchain.chains.combine_documents import create_stuff_documents_chain
69
+ # from langchain.chains import create_history_aware_retriever, create_retrieval_chain
70
+ # from langchain.chains.combine_documents import create_stuff_documents_chain
72
71
  from langchain_core.messages import HumanMessage, SystemMessage
73
72
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
74
- from langchain.retrievers import ContextualCompressionRetriever
73
+ # from langchain.retrievers import ContextualCompressionRetriever
75
74
  from langchain_community.document_compressors import FlashrankRerank
76
75
 
77
76
  load_env_file()
@@ -317,10 +316,6 @@ class TextProcessor:
317
316
  chunk_size=chunk_size,
318
317
  chunk_overlap=chunk_overlap
319
318
  ),
320
- 'semantic_chunker': SemanticChunker(
321
- chunk_size=chunk_size,
322
- chunk_overlap=chunk_overlap
323
- )
324
319
  }
325
320
 
326
321
  if text_splitter_type not in splitters:
@@ -584,32 +579,32 @@ class embedding_generator:
584
579
  retriever = self.retriever
585
580
  return retriever.get_relevant_documents(query)
586
581
 
587
- def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
588
- """
589
- Load a ContextualCompressionRetriever using FlashrankRerank.
590
-
591
- Args:
592
- base_retriever: Existing retriever (if None, uses self.retriever)
593
- model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
594
- top_n (int): Number of top documents to return after reranking
595
-
596
- Returns:
597
- ContextualCompressionRetriever: A compression-based retriever using Flashrank
598
- """
599
- if base_retriever is None:
600
- base_retriever = self.retriever
601
- if base_retriever is None:
602
- raise ValueError("Base retriever is required.")
603
-
604
- compressor = FlashrankRerank(model=model_name, top_n=top_n)
605
- self.compression_retriever = ContextualCompressionRetriever(
606
- base_compressor=compressor,
607
- base_retriever=base_retriever
608
- )
609
-
610
- if self.logger:
611
- self.logger.info("Loaded Flashrank compression retriever.")
612
- return self.compression_retriever
582
+ # def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
583
+ # """
584
+ # Load a ContextualCompressionRetriever using FlashrankRerank.
585
+
586
+ # Args:
587
+ # base_retriever: Existing retriever (if None, uses self.retriever)
588
+ # model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
589
+ # top_n (int): Number of top documents to return after reranking
590
+
591
+ # Returns:
592
+ # ContextualCompressionRetriever: A compression-based retriever using Flashrank
593
+ # """
594
+ # if base_retriever is None:
595
+ # base_retriever = self.retriever
596
+ # if base_retriever is None:
597
+ # raise ValueError("Base retriever is required.")
598
+
599
+ # compressor = FlashrankRerank(model=model_name, top_n=top_n)
600
+ # self.compression_retriever = ContextualCompressionRetriever(
601
+ # base_compressor=compressor,
602
+ # base_retriever=base_retriever
603
+ # )
604
+
605
+ # if self.logger:
606
+ # self.logger.info("Loaded Flashrank compression retriever.")
607
+ # return self.compression_retriever
613
608
 
614
609
  def compression_invoke(self, query: str):
615
610
  """
@@ -627,58 +622,58 @@ class embedding_generator:
627
622
  print("Compression retriever loaded.")
628
623
  return self.compression_retriever.invoke(query)
629
624
 
630
- def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
631
- """
632
- Generate RAG chain for conversation.
633
-
634
- Args:
635
- context_prompt (str): Optional context prompt
636
- retriever: Optional retriever instance
637
- llm: Optional language model instance
638
-
639
- Returns:
640
- Any: Generated RAG chain
641
-
642
- Example:
643
- ```python
644
- rag_chain = gen.generate_rag_chain(retriever=retriever)
645
- ```
646
- """
647
- if context_prompt is None:
648
- context_prompt = ("You are an assistant for question-answering tasks. "
649
- "Use the following pieces of retrieved context to answer the question. "
650
- "If you don't know the answer, just say that you don't know. "
651
- "Use three sentences maximum and keep the answer concise.\n\n{context}")
652
-
653
- contextualize_q_system_prompt = ("Given a chat history and the latest user question "
654
- "which might reference context in the chat history, "
655
- "formulate a standalone question which can be understood, "
656
- "just reformulate it if needed and otherwise return it as is.")
657
-
658
- contextualize_q_prompt = ChatPromptTemplate.from_messages([
659
- ("system", contextualize_q_system_prompt),
660
- MessagesPlaceholder("chat_history"),
661
- ("human", "{input}"),
662
- ])
663
-
664
- if retriever is None:
665
- retriever = self.retriever
666
- if llm is None:
667
- if not ModelProvider.check_package("langchain_openai"):
668
- raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
669
- from langchain_openai import ChatOpenAI
670
- llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
671
-
672
- history_aware_retriever = create_history_aware_retriever(llm, retriever,
673
- contextualize_q_prompt)
674
- qa_prompt = ChatPromptTemplate.from_messages([
675
- ("system", context_prompt),
676
- MessagesPlaceholder("chat_history"),
677
- ("human", "{input}"),
678
- ])
679
- question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
680
- rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
681
- return rag_chain
625
+ # def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
626
+ # """
627
+ # Generate RAG chain for conversation.
628
+
629
+ # Args:
630
+ # context_prompt (str): Optional context prompt
631
+ # retriever: Optional retriever instance
632
+ # llm: Optional language model instance
633
+
634
+ # Returns:
635
+ # Any: Generated RAG chain
636
+
637
+ # Example:
638
+ # ```python
639
+ # rag_chain = gen.generate_rag_chain(retriever=retriever)
640
+ # ```
641
+ # """
642
+ # if context_prompt is None:
643
+ # context_prompt = ("You are an assistant for question-answering tasks. "
644
+ # "Use the following pieces of retrieved context to answer the question. "
645
+ # "If you don't know the answer, just say that you don't know. "
646
+ # "Use three sentences maximum and keep the answer concise.\n\n{context}")
647
+
648
+ # contextualize_q_system_prompt = ("Given a chat history and the latest user question "
649
+ # "which might reference context in the chat history, "
650
+ # "formulate a standalone question which can be understood, "
651
+ # "just reformulate it if needed and otherwise return it as is.")
652
+
653
+ # contextualize_q_prompt = ChatPromptTemplate.from_messages([
654
+ # ("system", contextualize_q_system_prompt),
655
+ # MessagesPlaceholder("chat_history"),
656
+ # ("human", "{input}"),
657
+ # ])
658
+
659
+ # if retriever is None:
660
+ # retriever = self.retriever
661
+ # if llm is None:
662
+ # if not ModelProvider.check_package("langchain_openai"):
663
+ # raise ImportError("OpenAI package not found. Please install: pip install langchain-openai")
664
+ # from langchain_openai import ChatOpenAI
665
+ # llm = ChatOpenAI(model="gpt-4o", temperature=0.8)
666
+
667
+ # history_aware_retriever = create_history_aware_retriever(llm, retriever,
668
+ # contextualize_q_prompt)
669
+ # qa_prompt = ChatPromptTemplate.from_messages([
670
+ # ("system", context_prompt),
671
+ # MessagesPlaceholder("chat_history"),
672
+ # ("human", "{input}"),
673
+ # ])
674
+ # question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
675
+ # rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
676
+ # return rag_chain
682
677
 
683
678
  def conversation_chain(self, query: str, rag_chain, file: str = None):
684
679
  """
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 1
3
- PATCH_VERSION = 66
3
+ PATCH_VERSION = 67
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.66
3
+ Version: 1.1.67
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes