crewplus 0.2.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crewplus/__init__.py +10 -0
- crewplus/callbacks/__init__.py +1 -0
- crewplus/callbacks/async_langfuse_handler.py +166 -0
- crewplus/services/__init__.py +21 -0
- crewplus/services/azure_chat_model.py +145 -0
- crewplus/services/feedback.md +55 -0
- crewplus/services/feedback_manager.py +267 -0
- crewplus/services/gemini_chat_model.py +884 -0
- crewplus/services/init_services.py +57 -0
- crewplus/services/model_load_balancer.py +264 -0
- crewplus/services/schemas/feedback.py +61 -0
- crewplus/services/tracing_manager.py +182 -0
- crewplus/utils/__init__.py +4 -0
- crewplus/utils/schema_action.py +7 -0
- crewplus/utils/schema_document_updater.py +173 -0
- crewplus/utils/tracing_util.py +55 -0
- crewplus/vectorstores/milvus/__init__.py +5 -0
- crewplus/vectorstores/milvus/milvus_schema_manager.py +270 -0
- crewplus/vectorstores/milvus/schema_milvus.py +586 -0
- crewplus/vectorstores/milvus/vdb_service.py +917 -0
- crewplus-0.2.89.dist-info/METADATA +144 -0
- crewplus-0.2.89.dist-info/RECORD +29 -0
- crewplus-0.2.89.dist-info/WHEEL +4 -0
- crewplus-0.2.89.dist-info/entry_points.txt +4 -0
- crewplus-0.2.89.dist-info/licenses/LICENSE +21 -0
- docs/GeminiChatModel.md +247 -0
- docs/ModelLoadBalancer.md +134 -0
- docs/VDBService.md +238 -0
- docs/index.md +23 -0
docs/VDBService.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# VDBService Documentation
|
|
2
|
+
|
|
3
|
+
## 1. Introduction
|
|
4
|
+
|
|
5
|
+
The `VDBService` is a centralized service class designed to manage connections to vector databases (Milvus and Zilliz) and handle the instantiation of embedding models. It simplifies interactions with your vector store by reading all necessary configurations from a single `settings` object.
|
|
6
|
+
|
|
7
|
+
### Key Features:
|
|
8
|
+
- **Centralized Configuration**: Manages database connections and embedding model settings from a single Python dictionary.
|
|
9
|
+
- **Provider-Agnostic Client**: Supports both Milvus and Zilliz as vector store providers.
|
|
10
|
+
- **Resilient Connection**: Includes a built-in retry mechanism when first connecting to the vector database.
|
|
11
|
+
- **Instance Caching**: Caches `Zilliz` vector store instances by collection name to prevent re-instantiation and improve performance.
|
|
12
|
+
- **Flexible Embedding Models**: Can retrieve embedding models from either the global `ModelLoadBalancer` or directly from the configuration settings.
|
|
13
|
+
|
|
14
|
+
## 2. Initialization
|
|
15
|
+
|
|
16
|
+
To use the `VDBService`, you must first prepare a `settings` dictionary containing the configuration for your vector store and embedding provider. You then pass this dictionary to the service's constructor.
|
|
17
|
+
|
|
18
|
+
If you plan to use embedding models from the global `ModelLoadBalancer`, you must initialize it first.
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from crewplus.vectorstores.milvus.vdb_service import VDBService
|
|
22
|
+
from crewplus.services.init_services import init_load_balancer
|
|
23
|
+
|
|
24
|
+
# 1. (Optional) Initialize the global model load balancer if you plan to use it.
|
|
25
|
+
# This should be done once when your application starts.
|
|
26
|
+
init_load_balancer(config_path="path/to/your/models_config.json")
|
|
27
|
+
|
|
28
|
+
# 2. Define the configuration for the VDBService
|
|
29
|
+
settings = {
|
|
30
|
+
"embedder": {
|
|
31
|
+
"provider": "azure-openai",
|
|
32
|
+
"config": {
|
|
33
|
+
"model": "text-embedding-3-small",
|
|
34
|
+
"api_version": "2023-05-15",
|
|
35
|
+
"api_key": "YOUR_AZURE_OPENAI_KEY",
|
|
36
|
+
"openai_base_url": "YOUR_AZURE_OPENAI_ENDPOINT",
|
|
37
|
+
"embedding_dims": 1536
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"vector_store": {
|
|
41
|
+
"provider": "milvus",
|
|
42
|
+
"config": {
|
|
43
|
+
"host": "localhost",
|
|
44
|
+
"port": 19530,
|
|
45
|
+
"user": "root",
|
|
46
|
+
"password": "password",
|
|
47
|
+
"db_name": "default"
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"index_params": {
|
|
51
|
+
"metric_type": "L2",
|
|
52
|
+
"index_type": "AUTOINDEX",
|
|
53
|
+
"params": {}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# 3. Initialize the VDBService with the settings
|
|
58
|
+
vdb_service = VDBService(settings=settings)
|
|
59
|
+
|
|
60
|
+
print("VDBService initialized successfully!")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Alternative Initialization for Zilliz**
|
|
64
|
+
|
|
65
|
+
For a simpler Zilliz Cloud connection, you can initialize the service directly with your endpoint and token.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
# Initialize directly with Zilliz credentials
|
|
69
|
+
vdb_service_zilliz = VDBService(
|
|
70
|
+
endpoint="YOUR_ZILLIZ_ENDPOINT",
|
|
71
|
+
token="YOUR_ZILLIZ_TOKEN"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
print("VDBService for Zilliz initialized successfully!")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 3. Usage Examples
|
|
78
|
+
|
|
79
|
+
### Basic Usage: Get Vector Store with Default Embeddings
|
|
80
|
+
|
|
81
|
+
This example shows how to get a vector store instance using the default embedding model specified in the `embedder` section of your settings.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# Get a vector store instance for the "my_documents" collection
|
|
85
|
+
# This will use the "azure-openai" embedder from the settings by default.
|
|
86
|
+
vector_store = vdb_service.get_vector_store(collection_name="my_documents")
|
|
87
|
+
|
|
88
|
+
# You can now use the vector_store object to add or search for documents
|
|
89
|
+
# vector_store.add_texts(["some text to embed"])
|
|
90
|
+
print(f"Successfully retrieved vector store for collection: {vector_store.collection_name}")
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Advanced Usage: Using an Embedding Model from the Model Load Balancer
|
|
94
|
+
|
|
95
|
+
In some cases, you may want to use a specific embedding model managed by the central `ModelLoadBalancer`. This example demonstrates how to retrieve that model first and then pass it to `get_vector_store`.
|
|
96
|
+
|
|
97
|
+
This requires the `ModelLoadBalancer` to have been initialized, as shown in the Initialization section above.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
# 1. Get a specific embedding model from the ModelLoadBalancer
|
|
101
|
+
# The service will call get_model_balancer() internally to get the initialized instance.
|
|
102
|
+
embedding_model = vdb_service.get_embeddings(
|
|
103
|
+
from_model_balancer=True,
|
|
104
|
+
provider="azure-openai-embeddings",
|
|
105
|
+
model_type="embedding-large" # Specify the model type configured in the balancer
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
print(f"Retrieved embedding model from balancer: {embedding_model}")
|
|
109
|
+
|
|
110
|
+
# 2. Get a vector store instance using the specified embedding model
|
|
111
|
+
vector_store_from_balancer = vdb_service.get_vector_store(
|
|
112
|
+
collection_name="balancer_collection",
|
|
113
|
+
embeddings=embedding_model # Pass the specific embedding model
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
print(f"Successfully retrieved vector store for collection: {vector_store_from_balancer.collection_name}")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Getting the Raw Milvus Client
|
|
120
|
+
|
|
121
|
+
If you need to perform operations not exposed by the LangChain `Zilliz` wrapper, you can get direct access to the underlying `MilvusClient`.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# Get the raw Milvus client to perform advanced operations
|
|
125
|
+
client = vdb_service.get_vector_client()
|
|
126
|
+
|
|
127
|
+
# For example, list all collections in the database
|
|
128
|
+
collections = client.list_collections()
|
|
129
|
+
print("Available collections:", collections)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Adding and Deleting Documents by Source
|
|
133
|
+
|
|
134
|
+
This example shows a common workflow: adding documents with a specific `source` to a collection, and then using `delete_old_indexes` to remove them based on that source.
|
|
135
|
+
|
|
136
|
+
**Note:** The `delete_old_indexes` method in this example filters on the `source` metadata field. Ensure your implementation matches the field you intend to use for filtering.
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from langchain_core.documents import Document
|
|
140
|
+
import time
|
|
141
|
+
|
|
142
|
+
# 1. Get the vector store instance
|
|
143
|
+
collection_name = "test_collection_for_delete"
|
|
144
|
+
vector_store = vdb_service.get_vector_store(collection_name=collection_name)
|
|
145
|
+
|
|
146
|
+
# 2. Prepare documents with 'source' in their metadata.
|
|
147
|
+
# The delete function looks for this specific metadata field.
|
|
148
|
+
docs_to_add = [
|
|
149
|
+
Document(
|
|
150
|
+
page_content="This is a test document about CrewPlus AI.",
|
|
151
|
+
metadata={"source": "http://example.com/crewplus-docs"}
|
|
152
|
+
),
|
|
153
|
+
Document(
|
|
154
|
+
page_content="This is another test document, about LangChain.",
|
|
155
|
+
metadata={"source": "http://example.com/langchain-docs"} # Different source
|
|
156
|
+
)
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
# 3. Add the documents to the collection
|
|
160
|
+
ids = vector_store.add_documents(docs_to_add)
|
|
161
|
+
print(f"Added {len(ids)} documents to collection '{collection_name}'.")
|
|
162
|
+
|
|
163
|
+
# In a real application, you might need a short delay for indexing to complete.
|
|
164
|
+
time.sleep(2)
|
|
165
|
+
|
|
166
|
+
# 4. Verify the documents were added
|
|
167
|
+
results = vector_store.similarity_search("CrewPlus", k=2)
|
|
168
|
+
print(f"Found {len(results)} related documents before deletion.")
|
|
169
|
+
assert len(results) > 0
|
|
170
|
+
|
|
171
|
+
# 5. Delete the documents using the same source
|
|
172
|
+
source_to_delete = "http://example.com/crewplus-docs"
|
|
173
|
+
vdb_service.delete_old_indexes(url=source_to_delete, vdb=vector_store)
|
|
174
|
+
print(f"Called delete_old_indexes for source: {source_to_delete}")
|
|
175
|
+
|
|
176
|
+
# Allow time for the deletion to be processed.
|
|
177
|
+
time.sleep(2)
|
|
178
|
+
|
|
179
|
+
# 6. Verify the documents were deleted
|
|
180
|
+
results_after_delete = vector_store.similarity_search("CrewPlus", k=2)
|
|
181
|
+
print(f"Found {len(results_after_delete)} related documents after deletion.")
|
|
182
|
+
assert len(results_after_delete) == 0
|
|
183
|
+
|
|
184
|
+
# 7. Clean up by dropping the collection
|
|
185
|
+
vdb_service.drop_collection(collection_name=collection_name)
|
|
186
|
+
print(f"Dropped collection '{collection_name}'.")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Adding and Deleting Documents by Source ID
|
|
190
|
+
|
|
191
|
+
This example shows how to add documents with a `source_id` and then use `delete_old_indexes_by_id` to remove them.
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from langchain_core.documents import Document
|
|
195
|
+
import time
|
|
196
|
+
|
|
197
|
+
# 1. Get the vector store instance
|
|
198
|
+
collection_name = "test_collection_for_id_delete"
|
|
199
|
+
vector_store_for_id = vdb_service.get_vector_store(collection_name=collection_name)
|
|
200
|
+
|
|
201
|
+
# 2. Prepare documents with 'source_id' in their metadata.
|
|
202
|
+
docs_with_id = [
|
|
203
|
+
Document(
|
|
204
|
+
page_content="Document for agent A.",
|
|
205
|
+
metadata={"source_id": "agent-a-123"}
|
|
206
|
+
),
|
|
207
|
+
Document(
|
|
208
|
+
page_content="Another document for agent A.",
|
|
209
|
+
metadata={"source_id": "agent-a-123"}
|
|
210
|
+
)
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
# 3. Add the documents to the collection
|
|
214
|
+
ids = vector_store_for_id.add_documents(docs_with_id)
|
|
215
|
+
print(f"Added {len(ids)} documents to collection '{collection_name}'.")
|
|
216
|
+
|
|
217
|
+
time.sleep(2)
|
|
218
|
+
|
|
219
|
+
# 4. Verify the documents were added
|
|
220
|
+
results = vector_store_for_id.similarity_search("agent A", k=2)
|
|
221
|
+
print(f"Found {len(results)} related documents before deletion.")
|
|
222
|
+
assert len(results) == 2
|
|
223
|
+
|
|
224
|
+
# 5. Delete the documents using the source_id
|
|
225
|
+
id_to_delete = "agent-a-123"
|
|
226
|
+
vdb_service.delete_old_indexes_by_id(source_id=id_to_delete, vdb=vector_store_for_id)
|
|
227
|
+
print(f"Called delete_old_indexes_by_id for source_id: {id_to_delete}")
|
|
228
|
+
|
|
229
|
+
time.sleep(2)
|
|
230
|
+
|
|
231
|
+
# 6. Verify the documents were deleted
|
|
232
|
+
results_after_delete = vector_store_for_id.similarity_search("agent A", k=2)
|
|
233
|
+
print(f"Found {len(results_after_delete)} related documents after deletion.")
|
|
234
|
+
assert len(results_after_delete) == 0
|
|
235
|
+
|
|
236
|
+
# 7. Clean up by dropping the collection
|
|
237
|
+
vdb_service.drop_collection(collection_name=collection_name)
|
|
238
|
+
print(f"Dropped collection '{collection_name}'.")
|
docs/index.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Welcome to CrewPlus
|
|
2
|
+
|
|
3
|
+
**CrewPlus** provides the foundational services and core components for building advanced AI applications. It is the heart of the CrewPlus ecosystem, designed for scalability, extensibility, and seamless integration.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This repository, `crewplus-base`, contains the core `crewplus` Python package. It includes essential building blocks for interacting with large language models, managing vector databases, and handling application configuration. Whether you are building a simple chatbot or a complex multi-agent system, CrewPlus offers the robust foundation you need.
|
|
8
|
+
|
|
9
|
+
## The CrewPlus Ecosystem
|
|
10
|
+
|
|
11
|
+
CrewPlus is designed as a modular and extensible ecosystem of packages. This allows you to adopt only the components you need for your specific use case.
|
|
12
|
+
|
|
13
|
+
- **`crewplus` (This package):** The core package containing foundational services for chat, model load balancing, and vector stores.
|
|
14
|
+
- **`crewplus-agents`:** An extension for creating and managing autonomous AI agents.
|
|
15
|
+
- **`crewplus-ingestion`:** Provides robust pipelines for knowledge ingestion and data processing.
|
|
16
|
+
- **`crewplus-integrations`:** A collection of third-party integrations to connect CrewPlus with other services and platforms.
|
|
17
|
+
|
|
18
|
+
## Getting Started
|
|
19
|
+
|
|
20
|
+
To get started, check out our detailed user guides:
|
|
21
|
+
|
|
22
|
+
- **[GeminiChatModel Guide](./GeminiChatModel.md)**: A comprehensive guide to using the `GeminiChatModel` for text, image, and video understanding.
|
|
23
|
+
- **[ModelLoadBalancer Guide](./ModelLoadBalancer.md)**: A guide to using the `ModelLoadBalancer` for managing and accessing different language models.
|