mb-rag 1.0.134__tar.gz → 1.0.136__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mb-rag might be problematic. Click here for more details.
- {mb_rag-1.0.134 → mb_rag-1.0.136}/PKG-INFO +1 -1
- mb_rag-1.0.136/README.md +267 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/version.py +1 -1
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag.egg-info/PKG-INFO +1 -1
- mb_rag-1.0.134/README.md +0 -167
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/__init__.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/chatbot/__init__.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/chatbot/basic.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/chatbot/chains.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/chatbot/prompts.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/rag/__init__.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/rag/embeddings.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/utils/__init__.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/utils/bounding_box.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag/utils/extra.py +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag.egg-info/SOURCES.txt +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag.egg-info/dependency_links.txt +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag.egg-info/requires.txt +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/mb_rag.egg-info/top_level.txt +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/pyproject.toml +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/setup.cfg +0 -0
- {mb_rag-1.0.134 → mb_rag-1.0.136}/setup.py +0 -0
mb_rag-1.0.136/README.md
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# MB-RAG: Modular Building Blocks for Retrieval-Augmented Generation
|
|
2
|
+
|
|
3
|
+
MB-RAG is a flexible Python package that provides modular building blocks for creating RAG (Retrieval-Augmented Generation) applications. It integrates multiple LLM providers, embedding models, and utility functions to help you build powerful AI applications.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multiple LLM Support**:
|
|
8
|
+
- OpenAI (GPT-4, GPT-3.5)
|
|
9
|
+
- Anthropic (Claude)
|
|
10
|
+
- Google (Gemini)
|
|
11
|
+
- Ollama (Local models)
|
|
12
|
+
|
|
13
|
+
- **RAG Capabilities**:
|
|
14
|
+
- Text splitting and chunking
|
|
15
|
+
- Multiple embedding models
|
|
16
|
+
- Vector store integration
|
|
17
|
+
- Conversation history management
|
|
18
|
+
- Context-aware retrieval
|
|
19
|
+
|
|
20
|
+
- **Image Processing**:
|
|
21
|
+
- Bounding box generation with Gemini Vision
|
|
22
|
+
- Custom image annotations
|
|
23
|
+
- Multiple output formats
|
|
24
|
+
- Batch processing capabilities
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
1. Basic Installation:
|
|
29
|
+
```bash
|
|
30
|
+
pip install mb_rag
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
### Basic Chat Examples
|
|
36
|
+
```python
|
|
37
|
+
from mb_rag.chatbot.basic import ModelFactory, ConversationModel
|
|
38
|
+
|
|
39
|
+
# 1. Simple Query with ModelFactory
|
|
40
|
+
model = ModelFactory(model_type="openai", model_name="gpt-4")
|
|
41
|
+
response = model.invoke_query("What is artificial intelligence?")
|
|
42
|
+
print(response)
|
|
43
|
+
|
|
44
|
+
# 2. Image Analysis
|
|
45
|
+
model = ModelFactory(model_type="openai", model_name="gpt-4-vision-preview")
|
|
46
|
+
response = model.invoke_query(
|
|
47
|
+
"What's in these images?",
|
|
48
|
+
images=["image1.jpg", "image2.jpg"]
|
|
49
|
+
)
|
|
50
|
+
print(response)
|
|
51
|
+
|
|
52
|
+
# 3. Conversation with Context
|
|
53
|
+
conversation = ConversationModel(
|
|
54
|
+
model_name="gpt-4",
|
|
55
|
+
model_type="openai"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Initialize conversation with context
|
|
59
|
+
conversation.initialize_conversation(
|
|
60
|
+
question="What is machine learning?",
|
|
61
|
+
context="You are an AI expert. Provide clear, concise explanations."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Continue the conversation
|
|
65
|
+
response = conversation.add_message("How is it different from deep learning?")
|
|
66
|
+
print(response)
|
|
67
|
+
|
|
68
|
+
# Access conversation history
|
|
69
|
+
print("\nAll messages:")
|
|
70
|
+
for message in conversation.all_messages_content:
|
|
71
|
+
print(message)
|
|
72
|
+
|
|
73
|
+
# Save conversation
|
|
74
|
+
conversation.save_conversation("chat_history.txt")
|
|
75
|
+
|
|
76
|
+
# 4. Using Different Models
|
|
77
|
+
# Anthropic Claude
|
|
78
|
+
claude_model = ModelFactory(
|
|
79
|
+
model_type="anthropic",
|
|
80
|
+
model_name="claude-3-opus-20240229"
|
|
81
|
+
)
|
|
82
|
+
response = claude_model.invoke_query("Explain quantum computing")
|
|
83
|
+
|
|
84
|
+
# Google Gemini
|
|
85
|
+
gemini_model = ModelFactory(
|
|
86
|
+
model_type="google",
|
|
87
|
+
model_name="gemini-1.5-pro-latest"
|
|
88
|
+
)
|
|
89
|
+
response = gemini_model.invoke_query("Describe the solar system")
|
|
90
|
+
|
|
91
|
+
# Local Ollama
|
|
92
|
+
ollama_model = ModelFactory(
|
|
93
|
+
model_type="ollama",
|
|
94
|
+
model_name="llama3.1"
|
|
95
|
+
)
|
|
96
|
+
response = ollama_model.invoke_query("What is the meaning of life?")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Embeddings and RAG Example
|
|
100
|
+
```python
|
|
101
|
+
from mb_rag.rag.embeddings import embedding_generator
|
|
102
|
+
|
|
103
|
+
# Initialize embedding generator
|
|
104
|
+
em_gen = embedding_generator(
|
|
105
|
+
model="openai",
|
|
106
|
+
model_type="text-embedding-3-small",
|
|
107
|
+
vector_store_type="chroma"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Generate embeddings from text files
|
|
111
|
+
em_gen.generate_text_embeddings(
|
|
112
|
+
text_data_path=['./data.txt'],
|
|
113
|
+
chunk_size=500,
|
|
114
|
+
chunk_overlap=5,
|
|
115
|
+
folder_save_path='./embeddings'
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Load embeddings and create retriever
|
|
119
|
+
em_loading = em_gen.load_embeddings('./embeddings')
|
|
120
|
+
em_retriever = em_gen.load_retriever(
|
|
121
|
+
'./embeddings',
|
|
122
|
+
search_params=[{"k": 2, "score_threshold": 0.1}]
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Generate RAG chain for conversation
|
|
126
|
+
rag_chain = em_gen.generate_rag_chain(retriever=em_retriever)
|
|
127
|
+
|
|
128
|
+
# Have a conversation with context
|
|
129
|
+
response = em_gen.conversation_chain(
|
|
130
|
+
"What is this document about?",
|
|
131
|
+
rag_chain,
|
|
132
|
+
file='conversation_history.txt' # Optional: Save conversation
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Query specific information
|
|
136
|
+
results = em_gen.query_embeddings(
|
|
137
|
+
"What are the key points discussed?",
|
|
138
|
+
em_retriever
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Add new data to existing embeddings
|
|
142
|
+
em_gen.add_data(
|
|
143
|
+
'./embeddings',
|
|
144
|
+
['new_data.txt'],
|
|
145
|
+
chunk_size=500
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Web scraping and embedding
|
|
149
|
+
db = em_gen.firecrawl_web(
|
|
150
|
+
website="https://example.com",
|
|
151
|
+
mode="scrape",
|
|
152
|
+
file_to_save='./web_embeddings'
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Image Processing with Bounding Boxes
|
|
157
|
+
```python
|
|
158
|
+
from mb_rag.utils.bounding_box import BoundingBoxProcessor, BoundingBoxConfig
|
|
159
|
+
|
|
160
|
+
# Initialize processor with configuration
|
|
161
|
+
config = BoundingBoxConfig(
|
|
162
|
+
model_name="gemini-1.5-pro-latest",
|
|
163
|
+
api_key="your-api-key" # Or use environment variable GOOGLE_API_KEY
|
|
164
|
+
)
|
|
165
|
+
processor = BoundingBoxProcessor(config)
|
|
166
|
+
|
|
167
|
+
# Generate bounding boxes
|
|
168
|
+
boxes = processor.generate_bounding_boxes(
|
|
169
|
+
"image.jpg",
|
|
170
|
+
prompt="Return bounding boxes of objects"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Add boxes to image with custom styling
|
|
174
|
+
processed_img = processor.add_bounding_boxes(
|
|
175
|
+
"image.jpg",
|
|
176
|
+
boxes,
|
|
177
|
+
color=(0, 255, 0), # Green color
|
|
178
|
+
thickness=2,
|
|
179
|
+
font_scale=0.5,
|
|
180
|
+
show=True # Display result
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Save processed image
|
|
184
|
+
processor.save_image(processed_img, "output.jpg")
|
|
185
|
+
|
|
186
|
+
# Complete processing pipeline
|
|
187
|
+
result = processor.process_image(
|
|
188
|
+
"image.jpg",
|
|
189
|
+
output_path="result.jpg",
|
|
190
|
+
show=True
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Batch processing
|
|
194
|
+
def batch_process_images(processor, image_paths, output_dir, **kwargs):
|
|
195
|
+
"""Process multiple images with same settings."""
|
|
196
|
+
import os
|
|
197
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
results = []
|
|
200
|
+
for img_path in image_paths:
|
|
201
|
+
try:
|
|
202
|
+
output_path = os.path.join(
|
|
203
|
+
output_dir,
|
|
204
|
+
f"processed_{os.path.basename(img_path)}"
|
|
205
|
+
)
|
|
206
|
+
result = processor.process_image(
|
|
207
|
+
img_path,
|
|
208
|
+
output_path=output_path,
|
|
209
|
+
**kwargs
|
|
210
|
+
)
|
|
211
|
+
results.append((img_path, output_path, True))
|
|
212
|
+
except Exception as e:
|
|
213
|
+
results.append((img_path, None, False))
|
|
214
|
+
print(f"Error processing {img_path}: {e}")
|
|
215
|
+
return results
|
|
216
|
+
|
|
217
|
+
# Example batch processing
|
|
218
|
+
images = ["image1.jpg", "image2.jpg", "image3.jpg"]
|
|
219
|
+
results = batch_process_images(
|
|
220
|
+
processor,
|
|
221
|
+
images,
|
|
222
|
+
"./batch_output",
|
|
223
|
+
show=False
|
|
224
|
+
)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Package Structure
|
|
228
|
+
|
|
229
|
+
```
|
|
230
|
+
mb_rag/
|
|
231
|
+
├── rag/
|
|
232
|
+
│ └── embeddings.py # RAG and embedding functionality
|
|
233
|
+
├── chatbot/
|
|
234
|
+
│ ├── basic.py # Basic chatbot implementations
|
|
235
|
+
│ └── chains.py # LangChain integration
|
|
236
|
+
├── agents/
|
|
237
|
+
│ ├── run_agent.py # Agent execution
|
|
238
|
+
│ └── web_browser_agent.py # Web browsing capabilities
|
|
239
|
+
└── utils/
|
|
240
|
+
├── bounding_box.py # Image processing utilities
|
|
241
|
+
└── extra.py # Additional utilities
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## Dependencies
|
|
245
|
+
|
|
246
|
+
Core dependencies:
|
|
247
|
+
- langchain-core
|
|
248
|
+
- langchain-community
|
|
249
|
+
- langchain
|
|
250
|
+
- python-dotenv
|
|
251
|
+
|
|
252
|
+
Optional dependencies by feature:
|
|
253
|
+
- Language Models: langchain-openai, langchain-anthropic, langchain-google-genai, langchain-ollama
|
|
254
|
+
- Image Processing: Pillow, opencv-python, google-generativeai
|
|
255
|
+
- Vector Stores: chromadb
|
|
256
|
+
- Web Tools: firecrawl
|
|
257
|
+
|
|
258
|
+
See `requirements.txt` for a complete list.
|
|
259
|
+
|
|
260
|
+
## Environment Setup
|
|
261
|
+
|
|
262
|
+
Create a `.env` file in your project root:
|
|
263
|
+
```env
|
|
264
|
+
OPENAI_API_KEY=your_openai_key
|
|
265
|
+
ANTHROPIC_API_KEY=your_anthropic_key
|
|
266
|
+
GOOGLE_API_KEY=your_google_key
|
|
267
|
+
```
|
mb_rag-1.0.134/README.md
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# MB-RAG: Modular Building Blocks for Retrieval-Augmented Generation
|
|
2
|
-
|
|
3
|
-
MB-RAG is a flexible Python package that provides modular building blocks for creating RAG (Retrieval-Augmented Generation) applications. It integrates multiple LLM providers, embedding models, and utility functions to help you build powerful AI applications.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Multiple LLM Support**:
|
|
8
|
-
- OpenAI (GPT-4, GPT-3.5)
|
|
9
|
-
- Anthropic (Claude)
|
|
10
|
-
- Google (Gemini)
|
|
11
|
-
- Ollama (Local models)
|
|
12
|
-
|
|
13
|
-
- **RAG Capabilities**:
|
|
14
|
-
- Text splitting and chunking
|
|
15
|
-
- Multiple embedding models
|
|
16
|
-
- Vector store integration
|
|
17
|
-
- Conversation history management
|
|
18
|
-
- Context-aware retrieval
|
|
19
|
-
|
|
20
|
-
- **Web Tools**:
|
|
21
|
-
- Web browsing agent
|
|
22
|
-
- Content scraping
|
|
23
|
-
- Link extraction
|
|
24
|
-
- Search functionality
|
|
25
|
-
|
|
26
|
-
- **Image Processing**:
|
|
27
|
-
- Bounding box generation
|
|
28
|
-
- Image analysis with Gemini Vision
|
|
29
|
-
- OpenCV integration
|
|
30
|
-
|
|
31
|
-
## Installation
|
|
32
|
-
|
|
33
|
-
1. Basic Installation:
|
|
34
|
-
```bash
|
|
35
|
-
pip install mb_rag
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
## Quick Start
|
|
40
|
-
|
|
41
|
-
### Basic Model load and ask question
|
|
42
|
-
```
|
|
43
|
-
from mb_chatbot.basic import load_model,model_invoke
|
|
44
|
-
|
|
45
|
-
model = load_model(model_name: str = "gpt-4o", model_type: str = 'openai')
|
|
46
|
-
response = model_invoke(model,question='What is AI?')
|
|
47
|
-
response = model_invoke(model,question='what is there in the all the images?',images=['path1','path2']) ## running with images
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Basic RAG Example
|
|
51
|
-
```python
|
|
52
|
-
from mb_rag.rag.embeddings import embedding_generator
|
|
53
|
-
from mb_rag.chatbot.basic import get_chatbot_openai
|
|
54
|
-
|
|
55
|
-
# Initialize embeddings
|
|
56
|
-
embedder = embedding_generator(model='openai')
|
|
57
|
-
|
|
58
|
-
# Generate embeddings from text files
|
|
59
|
-
embedder.generate_text_embeddings(
|
|
60
|
-
text_data_path=['data.txt'],
|
|
61
|
-
folder_save_path='./embeddings'
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Load retriever
|
|
65
|
-
retriever = embedder.load_retriever('embeddings')
|
|
66
|
-
|
|
67
|
-
# Create RAG chain
|
|
68
|
-
chatbot = get_chatbot_openai()
|
|
69
|
-
rag_chain = embedder.generate_rag_chain(retriever=retriever, llm=chatbot)
|
|
70
|
-
|
|
71
|
-
# Query your documents
|
|
72
|
-
response = embedder.conversation_chain(
|
|
73
|
-
"What are the key points in the document?",
|
|
74
|
-
rag_chain
|
|
75
|
-
)
|
|
76
|
-
print(response)
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
### Web Browsing Example
|
|
80
|
-
```python
|
|
81
|
-
from mb_rag.agents.web_browser_agent import WebBrowserAgent
|
|
82
|
-
|
|
83
|
-
# Initialize web browser agent
|
|
84
|
-
agent = WebBrowserAgent()
|
|
85
|
-
|
|
86
|
-
# Browse and extract content
|
|
87
|
-
content = agent.browse("https://example.com")
|
|
88
|
-
links = agent._extract_links("https://example.com")
|
|
89
|
-
```
|
|
90
|
-
|
|
91
|
-
### Image Processing Example
|
|
92
|
-
```python
|
|
93
|
-
from mb_rag.utils.bounding_box import google_model, generate_bounding_box
|
|
94
|
-
|
|
95
|
-
# Initialize Gemini model
|
|
96
|
-
model = google_model()
|
|
97
|
-
|
|
98
|
-
# Generate bounding boxes
|
|
99
|
-
boxes = generate_bounding_box(
|
|
100
|
-
model,
|
|
101
|
-
"image.jpg",
|
|
102
|
-
"Return bounding boxes of objects"
|
|
103
|
-
)
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
## Package Structure
|
|
107
|
-
|
|
108
|
-
```
|
|
109
|
-
mb_rag/
|
|
110
|
-
├── rag/
|
|
111
|
-
│ └── embeddings.py # Core RAG functionality
|
|
112
|
-
├── chatbot/
|
|
113
|
-
│ ├── basic.py # Basic chatbot implementations
|
|
114
|
-
│ └── chains.py # LangChain integration
|
|
115
|
-
├── agents/
|
|
116
|
-
│ ├── run_agent.py # Agent execution
|
|
117
|
-
│ └── web_browser_agent.py # Web browsing capabilities
|
|
118
|
-
└── utils/
|
|
119
|
-
├── bounding_box.py # Image processing utilities
|
|
120
|
-
└── extra.py # Additional utilities
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
## Dependencies
|
|
124
|
-
|
|
125
|
-
Core dependencies:
|
|
126
|
-
- langchain-core
|
|
127
|
-
- langchain-community
|
|
128
|
-
- langchain
|
|
129
|
-
- python-dotenv
|
|
130
|
-
|
|
131
|
-
Optional dependencies are organized by feature:
|
|
132
|
-
- Language Models (OpenAI, Anthropic, Google, Ollama)
|
|
133
|
-
- Web Tools (BeautifulSoup, Requests)
|
|
134
|
-
- Image Processing (Pillow, OpenCV)
|
|
135
|
-
- Vector Stores (Chroma, FAISS)
|
|
136
|
-
- Cloud Services (AWS, Google Cloud)
|
|
137
|
-
|
|
138
|
-
See `requirements.txt` for a complete list of optional dependencies.
|
|
139
|
-
|
|
140
|
-
## Environment Setup
|
|
141
|
-
|
|
142
|
-
Create a `.env` file in your project root:
|
|
143
|
-
```env
|
|
144
|
-
OPENAI_API_KEY=your_openai_key
|
|
145
|
-
ANTHROPIC_API_KEY=your_anthropic_key
|
|
146
|
-
GOOGLE_API_KEY=your_google_key
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
## Error Handling
|
|
150
|
-
|
|
151
|
-
The package includes comprehensive error checking:
|
|
152
|
-
- Dependency verification before operations
|
|
153
|
-
- Clear error messages with installation instructions
|
|
154
|
-
- Helpful debugging information
|
|
155
|
-
- Fallbacks when possible
|
|
156
|
-
|
|
157
|
-
## Contributing
|
|
158
|
-
|
|
159
|
-
1. Fork the repository
|
|
160
|
-
2. Create a feature branch
|
|
161
|
-
3. Make your changes
|
|
162
|
-
4. Run tests
|
|
163
|
-
5. Submit a pull request
|
|
164
|
-
|
|
165
|
-
## Acknowledgments
|
|
166
|
-
|
|
167
|
-
Built with [LangChain](https://github.com/langchain-ai/langchain) and other amazing open-source projects.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|