mb-rag 1.1.46__tar.gz → 1.1.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mb-rag might be problematic. Click here for more details.

Files changed (24) hide show
  1. {mb_rag-1.1.46 → mb_rag-1.1.47}/PKG-INFO +1 -1
  2. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/rag/embeddings.py +58 -1
  3. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/version.py +1 -1
  4. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag.egg-info/PKG-INFO +1 -1
  5. {mb_rag-1.1.46 → mb_rag-1.1.47}/README.md +0 -0
  6. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/__init__.py +0 -0
  7. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/chatbot/__init__.py +0 -0
  8. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/chatbot/basic.py +0 -0
  9. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/chatbot/chains.py +0 -0
  10. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/chatbot/prompts.py +0 -0
  11. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/rag/__init__.py +0 -0
  12. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/__init__.py +0 -0
  13. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/all_data_extract.py +0 -0
  14. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/bounding_box.py +0 -0
  15. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/document_extract.py +0 -0
  16. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/extra.py +0 -0
  17. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag/utils/pdf_extract.py +0 -0
  18. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag.egg-info/SOURCES.txt +0 -0
  19. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag.egg-info/dependency_links.txt +0 -0
  20. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag.egg-info/requires.txt +0 -0
  21. {mb_rag-1.1.46 → mb_rag-1.1.47}/mb_rag.egg-info/top_level.txt +0 -0
  22. {mb_rag-1.1.46 → mb_rag-1.1.47}/pyproject.toml +0 -0
  23. {mb_rag-1.1.46 → mb_rag-1.1.47}/setup.cfg +0 -0
  24. {mb_rag-1.1.46 → mb_rag-1.1.47}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.46
3
+ Version: 1.1.47
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
@@ -61,7 +61,9 @@ from langchain.text_splitter import (
61
61
  CharacterTextSplitter,
62
62
  RecursiveCharacterTextSplitter,
63
63
  SentenceTransformersTokenTextSplitter,
64
- TokenTextSplitter)
64
+ TokenTextSplitter,
65
+ MarkdownHeaderTextSplitter,
66
+ SemanticChunker)
65
67
  from langchain_community.document_loaders import TextLoader, FireCrawlLoader
66
68
  from langchain_chroma import Chroma
67
69
  from ..utils.extra import load_env_file
@@ -69,6 +71,8 @@ from langchain.chains import create_history_aware_retriever, create_retrieval_ch
69
71
  from langchain.chains.combine_documents import create_stuff_documents_chain
70
72
  from langchain_core.messages import HumanMessage, SystemMessage
71
73
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
74
+ from langchain.retrievers import ContextualCompressionRetriever
75
+ from langchain_community.document_compressors import FlashrankRerank
72
76
 
73
77
  load_env_file()
74
78
 
@@ -308,6 +312,14 @@ class TextProcessor:
308
312
  'token': TokenTextSplitter(
309
313
  chunk_size=chunk_size,
310
314
  chunk_overlap=chunk_overlap
315
+ ),
316
+ 'markdown_header': MarkdownHeaderTextSplitter(
317
+ chunk_size=chunk_size,
318
+ chunk_overlap=chunk_overlap
319
+ ),
320
+ 'semantic_chunker': SemanticChunker(
321
+ chunk_size=chunk_size,
322
+ chunk_overlap=chunk_overlap
311
323
  )
312
324
  }
313
325
 
@@ -323,6 +335,7 @@ class TextProcessor:
323
335
  print(f"Text data splitted into {len(docs)} chunks")
324
336
  return docs
325
337
 
338
+
326
339
  class embedding_generator:
327
340
  """
328
341
  Main class for generating embeddings and managing RAG operations.
@@ -374,6 +387,7 @@ class embedding_generator:
374
387
  self.vector_store = self.load_vectorstore(**(vector_store_kwargs or {}))
375
388
  self.collection_name = collection_name
376
389
  self.text_processor = TextProcessor(logger)
390
+ self.compression_retriever = None
377
391
 
378
392
  def check_file(self, file_path: str) -> bool:
379
393
  """Check if file exists."""
@@ -570,6 +584,49 @@ class embedding_generator:
570
584
  retriever = self.retriever
571
585
  return retriever.get_relevant_documents(query)
572
586
 
587
+ def load_flashrank_compression_retriever(self, base_retriever=None, model_name: str = "flashrank/flashrank-base", top_n: int = 5):
588
+ """
589
+ Load a ContextualCompressionRetriever using FlashrankRerank.
590
+
591
+ Args:
592
+ base_retriever: Existing retriever (if None, uses self.retriever)
593
+ model_name (str): Flashrank model identifier (default: "flashrank/flashrank-base")
594
+ top_n (int): Number of top documents to return after reranking
595
+
596
+ Returns:
597
+ ContextualCompressionRetriever: A compression-based retriever using Flashrank
598
+ """
599
+ if base_retriever is None:
600
+ base_retriever = self.retriever
601
+ if base_retriever is None:
602
+ raise ValueError("Base retriever is required.")
603
+
604
+ compressor = FlashrankRerank(model=model_name, top_n=top_n)
605
+ self.compression_retriever = ContextualCompressionRetriever(
606
+ base_compressor=compressor,
607
+ base_retriever=base_retriever
608
+ )
609
+
610
+ if self.logger:
611
+ self.logger.info("Loaded Flashrank compression retriever.")
612
+ return self.compression_retriever
613
+
614
+ def compression_invoke(self, query: str):
615
+ """
616
+ Invoke compression retriever. Only one compression retriever (Reranker) added right now.
617
+
618
+ Args:
619
+ query (str): Query string
620
+
621
+ Returns:
622
+ Any: Query results
623
+ """
624
+
625
+ if self.compression_retriever is None:
626
+ self.compression_retriever = self.load_flashrank_compression_retriever(base_retriever=self.retriever)
627
+ print("Compression retriever loaded.")
628
+ return self.compression_retriever.invoke(query)
629
+
573
630
  def generate_rag_chain(self, context_prompt: str = None, retriever=None, llm=None):
574
631
  """
575
632
  Generate RAG chain for conversation.
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 1
3
- PATCH_VERSION = 46
3
+ PATCH_VERSION = 47
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.46
3
+ Version: 1.1.47
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes