mb-rag 1.1.46__py3-none-any.whl → 1.1.56.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mb-rag might be problematic. Click here for more details.

mb_rag/basic.py ADDED
@@ -0,0 +1,306 @@
1
+ ## file for loading all models for chat/rag
2
+
3
+ import os
4
+ from langchain_core.messages import HumanMessage
5
+ from mb_rag.utils.extra import check_package
6
+ import base64
7
+ from .utils.extra import check_package
8
+ from typing import Any
9
+ from .utils.all_data_extract import DocumentExtractor
10
+
11
+ __all__ = [
12
+ 'ModelFactory',
13
+ ]
14
+
15
+ class ModelFactory:
16
+ """Factory class for creating different types of chatbot models"""
17
+
18
+ def __init__(self, model_type: str = 'openai', model_name: str = "gpt-4o", **kwargs) -> Any:
19
+ """
20
+ Factory method to create any type of model
21
+ Args:
22
+ model_type (str): Type of model to create. Default is OpenAI. Options are openai, anthropic, google, ollama , groq
23
+ model_name (str): Name of the model
24
+ **kwargs: Additional arguments
25
+ Returns:
26
+ Any: Chatbot model
27
+ """
28
+ creators = {
29
+ 'openai': self.create_openai,
30
+ 'anthropic': self.create_anthropic,
31
+ 'google': self.create_google,
32
+ 'ollama': self.create_ollama,
33
+ 'groq': self.create_groq,
34
+ 'deepseek': self.create_deepseek,
35
+ 'qwen' : self.create_qwen,
36
+ 'hugging_face': self.create_hugging_face
37
+ }
38
+
39
+ self.model_type = model_type
40
+ self.model_name = model_name
41
+ model_data = creators.get(model_type)
42
+ if not model_data:
43
+ raise ValueError(f"Unsupported model type: {model_type}")
44
+
45
+ try:
46
+ self.model = model_data(model_name, **kwargs)
47
+ except Exception as e:
48
+ raise ValueError(f"Error creating {model_type} model: {str(e)}")
49
+
50
+ @classmethod
51
+ def create_openai(cls, model_name: str = "gpt-4o", **kwargs) -> Any:
52
+ """
53
+ Create OpenAI chatbot model
54
+ Args:
55
+ model_name (str): Name of the model
56
+ **kwargs: Additional arguments
57
+ Returns:
58
+ ChatOpenAI: Chatbot model
59
+ """
60
+ if not check_package("openai"):
61
+ raise ImportError("OpenAI package not found. Please install it using: pip install openai langchain-openai")
62
+
63
+ from langchain_openai import ChatOpenAI
64
+ kwargs["model_name"] = model_name
65
+ return ChatOpenAI(**kwargs)
66
+
67
+ @classmethod
68
+ def create_anthropic(cls, model_name: str = "claude-3-opus-20240229", **kwargs) -> Any:
69
+ """
70
+ Create Anthropic chatbot model
71
+ Args:
72
+ model_name (str): Name of the model
73
+ **kwargs: Additional arguments
74
+ Returns:
75
+ ChatAnthropic: Chatbot model
76
+ """
77
+ if not check_package("anthropic"):
78
+ raise ImportError("Anthropic package not found. Please install it using: pip install anthropic langchain-anthropic")
79
+
80
+ from langchain_anthropic import ChatAnthropic
81
+ kwargs["model_name"] = model_name
82
+ return ChatAnthropic(**kwargs)
83
+
84
+ @classmethod
85
+ def create_google(cls, model_name: str = "gemini-2.0-flash", **kwargs) -> Any:
86
+ """
87
+ Create Google chatbot model
88
+ Args:
89
+ model_name (str): Name of the model
90
+ **kwargs: Additional arguments
91
+ Returns:
92
+ ChatGoogleGenerativeAI: Chatbot model
93
+ """
94
+ if not check_package("langchain_google_genai"):
95
+ raise ImportError("langchain_google_genai package not found. Please install it using: pip install google-generativeai")
96
+
97
+ from langchain_google_genai import ChatGoogleGenerativeAI
98
+ kwargs["model"] = model_name
99
+ return ChatGoogleGenerativeAI(**kwargs)
100
+
101
+ @classmethod
102
+ def create_ollama(cls, model_name: str = "llama3", **kwargs) -> Any:
103
+ """
104
+ Create Ollama chatbot model
105
+ Args:
106
+ model_name (str): Name of the model
107
+ **kwargs: Additional arguments
108
+ Returns:
109
+ Ollama: Chatbot model
110
+ """
111
+ if not check_package("langchain_ollama"):
112
+ raise ImportError("Langchain Community package not found. Please install it using: pip install langchain_ollama")
113
+
114
+ from langchain_ollama import ChatOllama
115
+
116
+ print(f"Current Ollama serve model is {os.system('ollama ps')}")
117
+ kwargs["model"] = model_name
118
+ return ChatOllama(**kwargs)
119
+
120
+ @classmethod
121
+ def create_groq(cls, model_name: str = "llama-3.3-70b-versatile", **kwargs) -> Any:
122
+ """
123
+ Create Groq chatbot model
124
+ Args:
125
+ model_name (str): Name of the model
126
+ **kwargs: Additional arguments. Options are: temperature, groq_api_key, model_name
127
+ Returns:
128
+ ChatGroq: Chatbot model
129
+ """
130
+ if not check_package("langchain_groq"):
131
+ raise ImportError("Langchain Groq package not found. Please install it using: pip install langchain-groq")
132
+
133
+ from langchain_groq import ChatGroq
134
+ kwargs["model"] = model_name
135
+ return ChatGroq(**kwargs)
136
+
137
+ @classmethod
138
+ def create_deepseek(cls, model_name: str = "deepseek-chat", **kwargs) -> Any:
139
+ """
140
+ Create Deepseek chatbot model
141
+ Args:
142
+ model_name (str): Name of the model
143
+ **kwargs: Additional arguments
144
+ Returns:
145
+ ChatDeepseek: Chatbot model
146
+ """
147
+ if not check_package("langchain_deepseek"):
148
+ raise ImportError("Langchain Deepseek package not found. Please install it using: pip install langchain-deepseek")
149
+
150
+ from langchain_deepseek import ChatDeepSeek
151
+ kwargs["model"] = model_name
152
+ return ChatDeepSeek(**kwargs)
153
+
154
+ @classmethod
155
+ def create_qwen(cls, model_name: str = "qwen", **kwargs) -> Any:
156
+ """
157
+ Create Qwen chatbot model
158
+ Args:
159
+ model_name (str): Name of the model
160
+ **kwargs: Additional arguments
161
+ Returns:
162
+ ChatQwen: Chatbot model
163
+ """
164
+ if not check_package("langchain_community"):
165
+ raise ImportError("Langchain Qwen package not found. Please install it using: pip install langchain_community")
166
+
167
+ from langchain_community.chat_models.tongyi import ChatTongyi
168
+ kwargs["model"] = model_name
169
+ return ChatTongyi(streaming=True,**kwargs)
170
+
171
+ @classmethod
172
+ def create_hugging_face(cls, model_name: str = "Qwen/Qwen2.5-VL-7B-Instruct",model_function: str = "image-text-to-text",
173
+ device='cpu',**kwargs) -> Any:
174
+ """
175
+ Create and load hugging face model.
176
+ Args:
177
+ model_name (str): Name of the model
178
+ model_function (str): model function. Default is image-text-to-text.
179
+ device (str): Device to use. Default is cpu
180
+ **kwargs: Additional arguments
181
+ Returns:
182
+ ChatHuggingFace: Chatbot model
183
+ """
184
+ if not check_package("transformers"):
185
+ raise ImportError("Transformers package not found. Please install it using: pip install transformers")
186
+ if not check_package("langchain_huggingface"):
187
+ raise ImportError("langchain_huggingface package not found. Please install it using: pip install langchain_huggingface")
188
+ if not check_package("torch"):
189
+ raise ImportError("Torch package not found. Please install it using: pip install torch")
190
+
191
+ from langchain_huggingface import HuggingFacePipeline
192
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForImageTextToText,AutoProcessor
193
+ import torch
194
+
195
+ device = torch.device(device) if torch.cuda.is_available() else torch.device("cpu")
196
+
197
+ temperature = kwargs.pop("temperature", 0.7)
198
+ max_length = kwargs.pop("max_length", 1024)
199
+
200
+ if model_function == "image-text-to-text":
201
+ tokenizer = AutoProcessor.from_pretrained(model_name,trust_remote_code=True)
202
+ model = AutoModelForImageTextToText.from_pretrained(
203
+ model_name,
204
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
205
+ device_map=device,
206
+ trust_remote_code=True,
207
+ **kwargs
208
+ )
209
+ else:
210
+ tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
211
+ model = AutoModelForCausalLM.from_pretrained(
212
+ model_name,
213
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
214
+ device_map=device,
215
+ trust_remote_code=True,
216
+ **kwargs)
217
+
218
+ # Create pipeline
219
+ pipe = pipeline(
220
+ model_function,
221
+ model=model,
222
+ tokenizer=tokenizer,
223
+ max_length=max_length,
224
+ temperature=temperature
225
+ )
226
+
227
+ # Create and return LangChain HuggingFacePipeline
228
+ return HuggingFacePipeline(pipeline=pipe)
229
+
230
+ def _reset_model(self):
231
+ """Reset the model"""
232
+ self.model = self.model.reset()
233
+
234
+ def invoke_query(self,query: str,file_path: str = None,get_content_only: bool = True,images: list = None,pydantic_model = None) -> str:
235
+ """
236
+ Invoke the model
237
+ Args:
238
+ query (str): Query to send to the model
239
+ file_path (str): Path to text file to load. Default is None
240
+ get_content_only (bool): Whether to return only content
241
+ images (list): List of images to send to the model
242
+ pydantic_model: Pydantic model for structured output
243
+ Returns:
244
+ str: Response from the model
245
+ """
246
+ if file_path:
247
+ loader = DocumentExtractor()
248
+ data = loader.get_data(file_path)
249
+ query = query + "\n\n" + data
250
+
251
+ structured_model = None
252
+ if pydantic_model is not None:
253
+ try:
254
+ structured_model = self.model.with_structured_output(pydantic_model)
255
+ except Exception as e:
256
+ raise ValueError(f"Error with pydantic_model: {e}")
257
+ if structured_model is None:
258
+ structured_model = self.model
259
+ else:
260
+ print("Using structured model with pydantic schema. So get_content_only is set to False.")
261
+ get_content_only = False # Override to get full response when using structured model
262
+ if images:
263
+ message = self._model_invoke_images(
264
+ images=images,
265
+ prompt=query)
266
+ res = structured_model.invoke([message])
267
+ else:
268
+ res = structured_model.invoke(query)
269
+ if get_content_only:
270
+ try:
271
+ return res.content
272
+ except Exception:
273
+ return res
274
+ return res
275
+
276
+ def _image_to_base64(self,image):
277
+ with open(image, "rb") as f:
278
+ return base64.b64encode(f.read()).decode('utf-8')
279
+
280
+ def _model_invoke_images(self, images: list, prompt: str) -> str:
281
+ """
282
+ Function to invoke the model with images
283
+ Args:
284
+ images (list): List of images
285
+ prompt (str): Prompt
286
+ Returns:
287
+ str: Output from the model
288
+ """
289
+ base64_images = [self._image_to_base64(image) for image in images]
290
+ image_prompt_create = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_images[i]}"}} for i in range(len(images))]
291
+ prompt_new = [{"type": "text", "text": prompt}, *image_prompt_create]
292
+
293
+ message = HumanMessage(content=prompt_new)
294
+ return message
295
+
296
+ def _get_llm_metadata(self):
297
+ """
298
+ Returns Basic metadata about the LLM
299
+ """
300
+ print("Model Name: ", self.model)
301
+ print("Model Temperature: ", self.model.temperature)
302
+ print("Model Max Tokens: ", self.model.max_output_tokens)
303
+ print("Model Top P: ", self.model.top_p)
304
+ print("Model Top K: ", self.model.top_k)
305
+ print("Model Input Schema:",self.model.input_schema)
306
+