mb-rag 1.1.43__py3-none-any.whl → 1.1.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mb-rag might be problematic. Click here for more details.

mb_rag/chatbot/basic.py CHANGED
@@ -119,7 +119,7 @@ class ModelFactory:
119
119
  return ChatAnthropic(**kwargs)
120
120
 
121
121
  @classmethod
122
- def create_google(cls, model_name: str = "gemini-1.5-flash", **kwargs) -> Any:
122
+ def create_google(cls, model_name: str = "gemini-2.0-flash", **kwargs) -> Any:
123
123
  """
124
124
  Create Google chatbot model
125
125
  Args:
mb_rag/rag/embeddings.py CHANGED
@@ -180,6 +180,23 @@ class ModelProvider:
180
180
  kwargs["model"] = model_name
181
181
  return GoogleGenerativeAIEmbeddings(**kwargs)
182
182
 
183
+ @staticmethod
184
+ def get_rag_qwen(model_name: str = "qwen", **kwargs):
185
+ """
186
+ Load Qwen embedding model.
187
+ Uses Transformers for embedding generation.
188
+
189
+ Args:
190
+ model_name (str): Model identifier (default: "qwen")
191
+ **kwargs: Additional arguments for model initialization
192
+
193
+ Returns:
194
+ QwenEmbeddings: Initialized Qwen embeddings model
195
+ """
196
+ from langchain.embeddings import HuggingFaceEmbeddings
197
+
198
+ return HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B", **kwargs)
199
+
183
200
  def load_embedding_model(model_name: str = 'openai', model_type: str = "text-embedding-ada-002", **kwargs):
184
201
  """
185
202
  Load a RAG model based on provider and type.
@@ -206,6 +223,8 @@ def load_embedding_model(model_name: str = 'openai', model_type: str = "text-emb
206
223
  return ModelProvider.get_rag_google(model_type, **kwargs)
207
224
  elif model_name == 'anthropic':
208
225
  return ModelProvider.get_rag_anthropic(model_type, **kwargs)
226
+ elif model_name == 'qwen':
227
+ return ModelProvider.get_rag_qwen(model_type, **kwargs)
209
228
  else:
210
229
  raise ValueError(f"Invalid model name: {model_name}")
211
230
  except ImportError as e:
@@ -0,0 +1,65 @@
1
+ ## Docling data extract
2
+
3
+ from typing import List
4
+ from mb_rag.utils.extra import check_package
5
+
6
+ __all__ = ['DocumentExtractor']
7
+
8
+ class DocumentExtractor:
9
+ """
10
+ DocumentExtractor class for extracting data from documents using Docling.
11
+ """
12
+
13
+ def __init__(self):
14
+ """
15
+ Initialize the DocumentExtractor class.
16
+ Checking for Docling package.
17
+ """
18
+ if not check_package("docling"):
19
+ raise ImportError("Docling package not found. Please install it using: pip install docling")
20
+ from docling import Docling
21
+ self.Docling = Docling
22
+
23
+ def _extract_data(self, file_path: str, **kwargs) -> List[str]:
24
+ """
25
+ Extract data from a document using Docling.
26
+ """
27
+ try:
28
+ docling = self.Docling(file_path, **kwargs)
29
+ return docling.extract()
30
+ except Exception as e:
31
+ raise Exception(f"Error extracting data from {file_path}: {str(e)}")
32
+
33
+ def get_data(self,file_path: str, save_path: str = None, data_store_type: str = "markdown",**kwargs) -> List[str]:
34
+ """
35
+ Get data from a document using Docling.
36
+ Args:
37
+ file_path (str): Path to the document
38
+ save_path (str): Path to save the extracted data. Default is None. If None, data saved as Markdown file as docling_{file_name}.md
39
+ data_store_type (str): Saving document as markdown, txt or html. Default is markdown
40
+ **kwargs: Additional arguments for Docling
41
+ Returns:
42
+ List[str]: Extracted data
43
+ """
44
+ data = self._extract_data(file_path, **kwargs)
45
+ if data_store_type == "markdown":
46
+ data_type = "md"
47
+ elif data_store_type == "txt":
48
+ data_type = "txt"
49
+ elif data_store_type == "html":
50
+ data_type = "html"
51
+ else:
52
+ print("Invalid data store type. Defaulting to text (txt)")
53
+ data_type = "txt"
54
+ if save_path is None:
55
+ save_path = f"docling_{file_path.split('/')[-1].split('.')[0]}.{data_type}"
56
+ print(f"Saving extracted data to {save_path}")
57
+ if data_store_type == "markdown":
58
+ data_with_type = data.document.export_to_markdown()
59
+ elif data_store_type == "txt":
60
+ data_with_type = data.document.export_to_text()
61
+ elif data_store_type == "html":
62
+ data_with_type = data.document.export_to_html()
63
+ with open(save_path, 'w') as f:
64
+ f.write(data_with_type)
65
+ return data
mb_rag/version.py CHANGED
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 1
3
- PATCH_VERSION = 43
3
+ PATCH_VERSION = 45
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mb_rag
3
- Version: 1.1.43
3
+ Version: 1.1.45
4
4
  Summary: RAG function file
5
5
  Author: ['Malav Bateriwala']
6
6
  Requires-Python: >=3.8
@@ -1,17 +1,18 @@
1
1
  mb_rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mb_rag/version.py,sha256=F0q66lXUV7IiL9pvn0fGJZImdHtQZkDF6FEwisuxspA,207
2
+ mb_rag/version.py,sha256=9g4JnrnLgsbs9ZJE0iG3ErX8u7puBHMVjLiS08_wP_0,207
3
3
  mb_rag/chatbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- mb_rag/chatbot/basic.py,sha256=jsoPp0b-JhYRYW15WRv73h4OoZzwT2LTtptEOHnSQJo,23825
4
+ mb_rag/chatbot/basic.py,sha256=8tXU_3Yiqv0J-2Bnpw8p9sQaOlZHzX-Xenjs9GmWqes,23825
5
5
  mb_rag/chatbot/chains.py,sha256=vDbLX5R29sWN1pcFqJ5fyxJEgMCM81JAikunAEvMC9A,7223
6
6
  mb_rag/chatbot/prompts.py,sha256=n1PyiLbU-5fkslRv6aVOzt0dDlwya_cEdQ7kRnRhMuY,1749
7
7
  mb_rag/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- mb_rag/rag/embeddings.py,sha256=H6dDkZi4Ez9NZQrzaYzMlQ66ILLZiOIM-k1LNFfcjSM,27603
8
+ mb_rag/rag/embeddings.py,sha256=CI1tJnIUyGsZhFaqCCZ5xmKKJqdAT1ZAMRReUXLLt2k,28274
9
9
  mb_rag/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ mb_rag/utils/all_data_extract.py,sha256=TL6O4vNc7mPW-OGK-LhXQQIkSr0o3_7BqNAD-YpTQMU,2532
10
11
  mb_rag/utils/bounding_box.py,sha256=G0hdDam8QmYtD9lfwMeDHGm-TTo6KZg-yK5ESFL9zaM,8366
11
12
  mb_rag/utils/document_extract.py,sha256=vZiFB1RYm1BIEaNA0MveJ5Zp-KEi0ngKjW8xEdtPqXA,12558
12
13
  mb_rag/utils/extra.py,sha256=spbFrGgdruNyYQ5PzgvpSIa6Nm0rn9bb4qc8W9g582o,2492
13
14
  mb_rag/utils/pdf_extract.py,sha256=cVeMyhnAU4XZxjIZHKMYhrktTjUNOjhx2r_LZKReOZE,15598
14
- mb_rag-1.1.43.dist-info/METADATA,sha256=xzXHksEUFKJAEm48bYtz1U0uu7evE4lrnoYUX3zVje8,234
15
- mb_rag-1.1.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- mb_rag-1.1.43.dist-info/top_level.txt,sha256=FIK1eAa5uYnurgXZquBG-s3PIy-HDTC5yJBW4lTH_pM,7
17
- mb_rag-1.1.43.dist-info/RECORD,,
15
+ mb_rag-1.1.45.dist-info/METADATA,sha256=o7mzyY2MJfPaopqUvup1i4ptZeHnMja0jphc4y7jylM,234
16
+ mb_rag-1.1.45.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ mb_rag-1.1.45.dist-info/top_level.txt,sha256=FIK1eAa5uYnurgXZquBG-s3PIy-HDTC5yJBW4lTH_pM,7
18
+ mb_rag-1.1.45.dist-info/RECORD,,