mb-rag 1.1.57.post1__tar.gz → 1.1.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mb-rag might be problematic. Click here for more details.

Files changed (25) hide show
  1. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/PKG-INFO +11 -11
  2. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/README.md +269 -269
  3. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/basic.py +376 -306
  4. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/chatbot/chains.py +206 -206
  5. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/chatbot/conversation.py +185 -185
  6. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/chatbot/prompts.py +58 -58
  7. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/rag/embeddings.py +810 -810
  8. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/all_data_extract.py +64 -64
  9. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/bounding_box.py +231 -231
  10. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/document_extract.py +354 -354
  11. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/extra.py +73 -73
  12. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/pdf_extract.py +428 -428
  13. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/version.py +1 -1
  14. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag.egg-info/PKG-INFO +11 -11
  15. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/pyproject.toml +3 -3
  16. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/setup.cfg +4 -4
  17. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/setup.py +26 -26
  18. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/__init__.py +0 -0
  19. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/chatbot/__init__.py +0 -0
  20. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/rag/__init__.py +0 -0
  21. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag/utils/__init__.py +0 -0
  22. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag.egg-info/SOURCES.txt +0 -0
  23. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag.egg-info/dependency_links.txt +0 -0
  24. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag.egg-info/requires.txt +0 -0
  25. {mb_rag-1.1.57.post1 → mb_rag-1.1.59}/mb_rag.egg-info/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
- Metadata-Version: 2.4
2
- Name: mb_rag
3
- Version: 1.1.57.post1
4
- Summary: RAG function file
5
- Author: ['Malav Bateriwala']
6
- Requires-Python: >=3.8
7
- Requires-Dist: mb_base
8
- Dynamic: author
9
- Dynamic: requires-dist
10
- Dynamic: requires-python
11
- Dynamic: summary
1
+ Metadata-Version: 2.4
2
+ Name: mb_rag
3
+ Version: 1.1.59
4
+ Summary: RAG function file
5
+ Author: ['Malav Bateriwala']
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: mb_base
8
+ Dynamic: author
9
+ Dynamic: requires-dist
10
+ Dynamic: requires-python
11
+ Dynamic: summary
@@ -1,270 +1,270 @@
1
- # MB-RAG: Modular Building Blocks for Retrieval-Augmented Generation
2
-
3
- MB-RAG is a flexible Python package that provides modular building blocks for creating RAG (Retrieval-Augmented Generation) applications. It integrates multiple LLM providers, embedding models, and utility functions to help you build powerful AI applications.
4
-
5
- ## Features
6
-
7
- - **Multiple LLM Support**:
8
- - OpenAI (GPT-4, GPT-3.5)
9
- - Anthropic (Claude)
10
- - Google (Gemini)
11
- - Ollama (Local models)
12
- - Groq
13
-
14
- - **RAG Capabilities**:
15
- - Text splitting and chunking
16
- - Multiple embedding models
17
- - Vector store integration
18
- - Conversation history management
19
- - Context-aware retrieval
20
-
21
- - **Image Processing**:
22
- - Bounding box generation with Gemini Vision
23
- - Custom image annotations
24
- - Multiple output formats
25
- - Batch processing capabilities
26
-
27
- ## Installation
28
-
29
- 1. Basic Installation:
30
- ```bash
31
- pip install mb_rag
32
- ```
33
-
34
- ## Quick Start
35
-
36
- ### Basic Chat Examples
37
- ## check example_llm.ipynb for more details
38
- ```python
39
- from mb_rag.basic import ModelFactory
40
-
41
- # 1. Simple Query with ModelFactory
42
- model = ModelFactory(model_type="openai", model_name="gpt-4o")
43
- response = model.invoke_query("What is artificial intelligence?")
44
- print(response)
45
-
46
- # 2. Image Analysis
47
- model = ModelFactory(model_type="openai", model_name="gpt-4o")
48
- response = model.invoke_query(
49
- "What's in these images?",
50
- images=["image1.jpg", "image2.jpg"]
51
- )
52
- print(response)
53
-
54
- ## other models
55
- # Anthropic Claude
56
- claude_model = ModelFactory(
57
- model_type="anthropic",
58
- model_name="claude-3-opus-20240229"
59
- )
60
- response = claude_model.invoke_query("Explain quantum computing")
61
-
62
- # Google Gemini
63
- gemini_model = ModelFactory(
64
- model_type="google",
65
- model_name="gemini-1.5-pro-latest"
66
- )
67
- response = gemini_model.invoke_query("Describe the solar system")
68
-
69
- # Local Ollama
70
- ollama_model = ModelFactory(
71
- model_type="ollama",
72
- model_name="llama3.1"
73
- )
74
- response = ollama_model.invoke_query("What is the meaning of life?")
75
-
76
-
77
- ## check example_conversation.ipynb for more details
78
-
79
- from mb_rag.chatbot.conversation import ConversationModel
80
- # 3. Conversation with Context : if file_path/message_list is not provided, it will create a new conversation
81
- conversation = ConversationModel(llm=ModelFactory(model_type="openai", model_name="gpt-4o"),
82
- file_path=None,
83
- message_list=None)
84
-
85
- conversation.initialize_conversation()
86
-
87
- # Continue the conversation
88
- response = conversation.add_message("How is it different from deep learning?")
89
- print(response)
90
-
91
- # Access conversation history
92
- print("\nAll messages:")
93
- for message in conversation.all_messages_content:
94
- print(message)
95
-
96
- # Save conversation
97
- conversation.save_conversation("chat_history.txt")
98
-
99
- ```
100
-
101
- ### Embeddings and RAG Example
102
- ```python
103
- from mb_rag.rag.embeddings import embedding_generator
104
-
105
- # Initialize embedding generator
106
- em_gen = embedding_generator(
107
- model="openai",
108
- model_type="text-embedding-3-small",
109
- vector_store_type="chroma"
110
- )
111
-
112
- # Generate embeddings from text files
113
- em_gen.generate_text_embeddings(
114
- text_data_path=['./data.txt'],
115
- chunk_size=500,
116
- chunk_overlap=5,
117
- folder_save_path='./embeddings'
118
- )
119
-
120
- # Load embeddings and create retriever
121
- em_loading = em_gen.load_embeddings('./embeddings')
122
- em_retriever = em_gen.load_retriever(
123
- './embeddings',
124
- search_params=[{"k": 2, "score_threshold": 0.1}]
125
- )
126
-
127
- # Generate RAG chain for conversation
128
- rag_chain = em_gen.generate_rag_chain(retriever=em_retriever)
129
-
130
- # Have a conversation with context
131
- response = em_gen.conversation_chain(
132
- "What is this document about?",
133
- rag_chain,
134
- file='conversation_history.txt' # Optional: Save conversation
135
- )
136
-
137
- # Query specific information
138
- results = em_gen.query_embeddings(
139
- "What are the key points discussed?",
140
- em_retriever
141
- )
142
-
143
- # Add new data to existing embeddings
144
- em_gen.add_data(
145
- './embeddings',
146
- ['new_data.txt'],
147
- chunk_size=500
148
- )
149
-
150
- # Web scraping and embedding
151
- db = em_gen.firecrawl_web(
152
- website="https://github.com",
153
- mode="scrape",
154
- file_to_save='./web_embeddings'
155
- )
156
- ```
157
-
158
- ### Image Processing with Bounding Boxes
159
- ```python
160
- from mb_rag.utils.bounding_box import BoundingBoxProcessor, BoundingBoxConfig
161
-
162
- # Initialize processor with configuration
163
- config = BoundingBoxConfig(
164
- model_name="gemini-1.5-pro-latest",
165
- api_key="your-api-key" # Or use environment variable GOOGLE_API_KEY
166
- )
167
- processor = BoundingBoxProcessor(config)
168
-
169
- # Generate bounding boxes
170
- boxes = processor.generate_bounding_boxes(
171
- "image.jpg",
172
- prompt="Return bounding boxes of objects"
173
- )
174
-
175
- # Add boxes to image with custom styling
176
- processed_img = processor.add_bounding_boxes(
177
- "image.jpg",
178
- boxes,
179
- color=(0, 255, 0), # Green color
180
- thickness=2,
181
- font_scale=0.5,
182
- show=True # Display result
183
- )
184
-
185
- # Save processed image
186
- processor.save_image(processed_img, "output.jpg")
187
-
188
- # Complete processing pipeline
189
- result = processor.process_image(
190
- "image.jpg",
191
- output_path="result.jpg",
192
- show=True
193
- )
194
-
195
- # Batch processing
196
- def batch_process_images(processor, image_paths, output_dir, **kwargs):
197
- """Process multiple images with same settings."""
198
- import os
199
- os.makedirs(output_dir, exist_ok=True)
200
-
201
- results = []
202
- for img_path in image_paths:
203
- try:
204
- output_path = os.path.join(
205
- output_dir,
206
- f"processed_{os.path.basename(img_path)}"
207
- )
208
- result = processor.process_image(
209
- img_path,
210
- output_path=output_path,
211
- **kwargs
212
- )
213
- results.append((img_path, output_path, True))
214
- except Exception as e:
215
- results.append((img_path, None, False))
216
- print(f"Error processing {img_path}: {e}")
217
- return results
218
-
219
- # Example batch processing
220
- images = ["image1.jpg", "image2.jpg", "image3.jpg"]
221
- results = batch_process_images(
222
- processor,
223
- images,
224
- "./batch_output",
225
- show=False
226
- )
227
- ```
228
-
229
- ## Package Structure
230
-
231
- ```
232
- mb_rag/
233
- ├── rag/
234
- │ └── embeddings.py # RAG and embedding functionality
235
- ├── chatbot/
236
- └── conversation.py # Conversation functionality
237
- │ └── chains.py # LangChain integration
238
- ├── agents/
239
- │ ├── run_agent.py # Agent execution
240
- │ └── web_browser_agent.py # Web browsing capabilities, Added WebAgent with langgraph
241
- └── utils/
242
- ├── bounding_box.py # Image processing utilities
243
- └── extra.py # Additional utilities
244
- └── basic.py # Basic chatbot implementations
245
- ```
246
-
247
- ## Dependencies
248
-
249
- Core dependencies:
250
- - langchain-core
251
- - langchain-community
252
- - langchain
253
- - python-dotenv
254
-
255
- Optional dependencies by feature:
256
- - Language Models: langchain-openai, langchain-anthropic, langchain-google-genai, langchain-ollama
257
- - Image Processing: Pillow, opencv-python, google-generativeai
258
- - Vector Stores: chromadb
259
- - Web Tools: firecrawl
260
-
261
- See `requirements.txt` for a complete list.
262
-
263
- ## Environment Setup
264
-
265
- Create a `.env` file in your project root:
266
- ```env
267
- OPENAI_API_KEY=your_openai_key
268
- ANTHROPIC_API_KEY=your_anthropic_key
269
- GOOGLE_API_KEY=your_google_key
1
+ # MB-RAG: Modular Building Blocks for Retrieval-Augmented Generation
2
+
3
+ MB-RAG is a flexible Python package that provides modular building blocks for creating RAG (Retrieval-Augmented Generation) applications. It integrates multiple LLM providers, embedding models, and utility functions to help you build powerful AI applications.
4
+
5
+ ## Features
6
+
7
+ - **Multiple LLM Support**:
8
+ - OpenAI (GPT-4, GPT-3.5)
9
+ - Anthropic (Claude)
10
+ - Google (Gemini)
11
+ - Ollama (Local models)
12
+ - Groq
13
+
14
+ - **RAG Capabilities**:
15
+ - Text splitting and chunking
16
+ - Multiple embedding models
17
+ - Vector store integration
18
+ - Conversation history management
19
+ - Context-aware retrieval
20
+
21
+ - **Image Processing**:
22
+ - Bounding box generation with Gemini Vision
23
+ - Custom image annotations
24
+ - Multiple output formats
25
+ - Batch processing capabilities
26
+
27
+ ## Installation
28
+
29
+ 1. Basic Installation:
30
+ ```bash
31
+ pip install mb_rag
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ### Basic Chat Examples
37
+ ## check example_llm.ipynb for more details
38
+ ```python
39
+ from mb_rag.basic import ModelFactory
40
+
41
+ # 1. Simple Query with ModelFactory
42
+ model = ModelFactory(model_type="openai", model_name="gpt-4o")
43
+ response = model.invoke_query("What is artificial intelligence?")
44
+ print(response)
45
+
46
+ # 2. Image Analysis
47
+ model = ModelFactory(model_type="openai", model_name="gpt-4o")
48
+ response = model.invoke_query(
49
+ "What's in these images?",
50
+ images=["image1.jpg", "image2.jpg"]
51
+ )
52
+ print(response)
53
+
54
+ ## other models
55
+ # Anthropic Claude
56
+ claude_model = ModelFactory(
57
+ model_type="anthropic",
58
+ model_name="claude-3-opus-20240229"
59
+ )
60
+ response = claude_model.invoke_query("Explain quantum computing")
61
+
62
+ # Google Gemini
63
+ gemini_model = ModelFactory(
64
+ model_type="google",
65
+ model_name="gemini-1.5-pro-latest"
66
+ )
67
+ response = gemini_model.invoke_query("Describe the solar system")
68
+
69
+ # Local Ollama
70
+ ollama_model = ModelFactory(
71
+ model_type="ollama",
72
+ model_name="llama3.1"
73
+ )
74
+ response = ollama_model.invoke_query("What is the meaning of life?")
75
+
76
+
77
+ ## check example_conversation.ipynb for more details
78
+
79
+ from mb_rag.chatbot.conversation import ConversationModel
80
+ # 3. Conversation with Context : if file_path/message_list is not provided, it will create a new conversation
81
+ conversation = ConversationModel(llm=ModelFactory(model_type="openai", model_name="gpt-4o"),
82
+ file_path=None,
83
+ message_list=None)
84
+
85
+ conversation.initialize_conversation()
86
+
87
+ # Continue the conversation
88
+ response = conversation.add_message("How is it different from deep learning?")
89
+ print(response)
90
+
91
+ # Access conversation history
92
+ print("\nAll messages:")
93
+ for message in conversation.all_messages_content:
94
+ print(message)
95
+
96
+ # Save conversation
97
+ conversation.save_conversation("chat_history.txt")
98
+
99
+ ```
100
+
101
+ ### Embeddings and RAG Example
102
+ ```python
103
+ from mb_rag.rag.embeddings import embedding_generator
104
+
105
+ # Initialize embedding generator
106
+ em_gen = embedding_generator(
107
+ model="openai",
108
+ model_type="text-embedding-3-small",
109
+ vector_store_type="chroma"
110
+ )
111
+
112
+ # Generate embeddings from text files
113
+ em_gen.generate_text_embeddings(
114
+ text_data_path=['./data.txt'],
115
+ chunk_size=500,
116
+ chunk_overlap=5,
117
+ folder_save_path='./embeddings'
118
+ )
119
+
120
+ # Load embeddings and create retriever
121
+ em_loading = em_gen.load_embeddings('./embeddings')
122
+ em_retriever = em_gen.load_retriever(
123
+ './embeddings',
124
+ search_params=[{"k": 2, "score_threshold": 0.1}]
125
+ )
126
+
127
+ # Generate RAG chain for conversation
128
+ rag_chain = em_gen.generate_rag_chain(retriever=em_retriever)
129
+
130
+ # Have a conversation with context
131
+ response = em_gen.conversation_chain(
132
+ "What is this document about?",
133
+ rag_chain,
134
+ file='conversation_history.txt' # Optional: Save conversation
135
+ )
136
+
137
+ # Query specific information
138
+ results = em_gen.query_embeddings(
139
+ "What are the key points discussed?",
140
+ em_retriever
141
+ )
142
+
143
+ # Add new data to existing embeddings
144
+ em_gen.add_data(
145
+ './embeddings',
146
+ ['new_data.txt'],
147
+ chunk_size=500
148
+ )
149
+
150
+ # Web scraping and embedding
151
+ db = em_gen.firecrawl_web(
152
+ website="https://github.com",
153
+ mode="scrape",
154
+ file_to_save='./web_embeddings'
155
+ )
156
+ ```
157
+
158
+ ### Image Processing with Bounding Boxes
159
+ ```python
160
+ from mb_rag.utils.bounding_box import BoundingBoxProcessor, BoundingBoxConfig
161
+
162
+ # Initialize processor with configuration
163
+ config = BoundingBoxConfig(
164
+ model_name="gemini-1.5-pro-latest",
165
+ api_key="your-api-key" # Or use environment variable GOOGLE_API_KEY
166
+ )
167
+ processor = BoundingBoxProcessor(config)
168
+
169
+ # Generate bounding boxes
170
+ boxes = processor.generate_bounding_boxes(
171
+ "image.jpg",
172
+ prompt="Return bounding boxes of objects"
173
+ )
174
+
175
+ # Add boxes to image with custom styling
176
+ processed_img = processor.add_bounding_boxes(
177
+ "image.jpg",
178
+ boxes,
179
+ color=(0, 255, 0), # Green color
180
+ thickness=2,
181
+ font_scale=0.5,
182
+ show=True # Display result
183
+ )
184
+
185
+ # Save processed image
186
+ processor.save_image(processed_img, "output.jpg")
187
+
188
+ # Complete processing pipeline
189
+ result = processor.process_image(
190
+ "image.jpg",
191
+ output_path="result.jpg",
192
+ show=True
193
+ )
194
+
195
+ # Batch processing
196
+ def batch_process_images(processor, image_paths, output_dir, **kwargs):
197
+ """Process multiple images with same settings."""
198
+ import os
199
+ os.makedirs(output_dir, exist_ok=True)
200
+
201
+ results = []
202
+ for img_path in image_paths:
203
+ try:
204
+ output_path = os.path.join(
205
+ output_dir,
206
+ f"processed_{os.path.basename(img_path)}"
207
+ )
208
+ result = processor.process_image(
209
+ img_path,
210
+ output_path=output_path,
211
+ **kwargs
212
+ )
213
+ results.append((img_path, output_path, True))
214
+ except Exception as e:
215
+ results.append((img_path, None, False))
216
+ print(f"Error processing {img_path}: {e}")
217
+ return results
218
+
219
+ # Example batch processing
220
+ images = ["image1.jpg", "image2.jpg", "image3.jpg"]
221
+ results = batch_process_images(
222
+ processor,
223
+ images,
224
+ "./batch_output",
225
+ show=False
226
+ )
227
+ ```
228
+
229
+ ## Package Structure
230
+
231
+ ```
232
+ mb_rag/
233
+ ├── rag/
234
+ │ └── embeddings.py # RAG and embedding functionality
235
+ ├── chatbot/
236
+ └── conversation.py # Conversation functionality
237
+ │ └── chains.py # LangChain integration
238
+ ├── agents/
239
+ │ ├── run_agent.py # Agent execution
240
+ │ └── web_browser_agent.py # Web browsing capabilities, Added WebAgent with langgraph
241
+ └── utils/
242
+ ├── bounding_box.py # Image processing utilities
243
+ └── extra.py # Additional utilities
244
+ └── basic.py # Basic chatbot implementations
245
+ ```
246
+
247
+ ## Dependencies
248
+
249
+ Core dependencies:
250
+ - langchain-core
251
+ - langchain-community
252
+ - langchain
253
+ - python-dotenv
254
+
255
+ Optional dependencies by feature:
256
+ - Language Models: langchain-openai, langchain-anthropic, langchain-google-genai, langchain-ollama
257
+ - Image Processing: Pillow, opencv-python, google-generativeai
258
+ - Vector Stores: chromadb
259
+ - Web Tools: firecrawl
260
+
261
+ See `requirements.txt` for a complete list.
262
+
263
+ ## Environment Setup
264
+
265
+ Create a `.env` file in your project root:
266
+ ```env
267
+ OPENAI_API_KEY=your_openai_key
268
+ ANTHROPIC_API_KEY=your_anthropic_key
269
+ GOOGLE_API_KEY=your_google_key
270
270
  ```