crewplus 0.2.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crewplus/__init__.py +10 -0
- crewplus/callbacks/__init__.py +1 -0
- crewplus/callbacks/async_langfuse_handler.py +166 -0
- crewplus/services/__init__.py +21 -0
- crewplus/services/azure_chat_model.py +145 -0
- crewplus/services/feedback.md +55 -0
- crewplus/services/feedback_manager.py +267 -0
- crewplus/services/gemini_chat_model.py +884 -0
- crewplus/services/init_services.py +57 -0
- crewplus/services/model_load_balancer.py +264 -0
- crewplus/services/schemas/feedback.py +61 -0
- crewplus/services/tracing_manager.py +182 -0
- crewplus/utils/__init__.py +4 -0
- crewplus/utils/schema_action.py +7 -0
- crewplus/utils/schema_document_updater.py +173 -0
- crewplus/utils/tracing_util.py +55 -0
- crewplus/vectorstores/milvus/__init__.py +5 -0
- crewplus/vectorstores/milvus/milvus_schema_manager.py +270 -0
- crewplus/vectorstores/milvus/schema_milvus.py +586 -0
- crewplus/vectorstores/milvus/vdb_service.py +917 -0
- crewplus-0.2.89.dist-info/METADATA +144 -0
- crewplus-0.2.89.dist-info/RECORD +29 -0
- crewplus-0.2.89.dist-info/WHEEL +4 -0
- crewplus-0.2.89.dist-info/entry_points.txt +4 -0
- crewplus-0.2.89.dist-info/licenses/LICENSE +21 -0
- docs/GeminiChatModel.md +247 -0
- docs/ModelLoadBalancer.md +134 -0
- docs/VDBService.md +238 -0
- docs/index.md +23 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from langchain_core.documents import Document
|
|
3
|
+
import random
|
|
4
|
+
|
|
5
|
+
class SchemaDocumentUpdater:
|
|
6
|
+
"""A utility class for updating and creating LangChain Documents with specific metadata schemas."""
|
|
7
|
+
|
|
8
|
+
@staticmethod
|
|
9
|
+
def update_document_metadata(document: Document, metadata: dict) -> Document:
|
|
10
|
+
"""
|
|
11
|
+
Updates the metadata of a LangChain Document.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
document (Document): The document to update.
|
|
15
|
+
metadata (dict): A dictionary containing the metadata to add or update.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Document: The updated document with the new metadata.
|
|
19
|
+
"""
|
|
20
|
+
metadata_updates = document.metadata
|
|
21
|
+
|
|
22
|
+
for key, value in metadata.items():
|
|
23
|
+
metadata_updates[key] = value
|
|
24
|
+
|
|
25
|
+
return Document(
|
|
26
|
+
page_content=document.page_content,
|
|
27
|
+
metadata=metadata_updates
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def delete_document_metadata(document: Document, keys_to_delete: List[str]) -> Document:
|
|
32
|
+
"""
|
|
33
|
+
Deletes specified keys from the metadata of a LangChain Document.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
document (Document): The document to update.
|
|
37
|
+
keys_to_delete (List[str]): A list of keys to delete from the metadata.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Document: The updated document with the specified metadata keys removed.
|
|
41
|
+
"""
|
|
42
|
+
metadata = document.metadata
|
|
43
|
+
|
|
44
|
+
for key in keys_to_delete:
|
|
45
|
+
if key in metadata:
|
|
46
|
+
del metadata[key]
|
|
47
|
+
|
|
48
|
+
return Document(
|
|
49
|
+
page_content=document.page_content,
|
|
50
|
+
metadata=metadata
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def add_sample_metadata(document: Document, type: str) -> Document:
|
|
55
|
+
"""
|
|
56
|
+
Adds sample metadata to a document based on a specified type.
|
|
57
|
+
|
|
58
|
+
The metadata schema is tailored for either "Reg Wheel" or "Robot" types.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
document (Document): The document to which sample metadata will be added.
|
|
62
|
+
type (str): The type of sample metadata to add ("Reg Wheel" or "Robot").
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Document: The document with added sample metadata.
|
|
66
|
+
"""
|
|
67
|
+
if type == "Reg Wheel":
|
|
68
|
+
meta = {
|
|
69
|
+
"keywords": "Reg Wheel",
|
|
70
|
+
"plant_metadata": {
|
|
71
|
+
"entity_id": "EQUIP_123",
|
|
72
|
+
"entity_type": "Machine",
|
|
73
|
+
"hierarchy_path": "/EnterpriseA/SITE_A/LINE_003/",
|
|
74
|
+
"entity_tags": ["nickname_for_EQUIP_123", "PB3"],
|
|
75
|
+
"parent_entity": None,
|
|
76
|
+
"linked_entities": []
|
|
77
|
+
},
|
|
78
|
+
"version_metadata": {
|
|
79
|
+
"version_id": "V2.0",
|
|
80
|
+
"version_tags": ["global"],
|
|
81
|
+
"version_date": "2024/05/23"
|
|
82
|
+
},
|
|
83
|
+
"other_metadata": {}
|
|
84
|
+
}
|
|
85
|
+
else: # Robot
|
|
86
|
+
meta = {
|
|
87
|
+
"keywords": "Robot",
|
|
88
|
+
"plant_metadata": {
|
|
89
|
+
"entity_id": "EQUIP_124",
|
|
90
|
+
"entity_type": "Robot",
|
|
91
|
+
"hierarchy_path": "/EnterpriseA/SITE_A/LINE_002/",
|
|
92
|
+
"entity_tags": ["nickname_for_EQUIP_124", "RB2"],
|
|
93
|
+
"parent_entity": None,
|
|
94
|
+
"linked_entities": []
|
|
95
|
+
},
|
|
96
|
+
"version_metadata": {
|
|
97
|
+
"version_id": "R1.0",
|
|
98
|
+
"version_tags": ["prototype"],
|
|
99
|
+
"version_date": "2024/05/23"
|
|
100
|
+
},
|
|
101
|
+
"other_metadata": {}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
updated_document = SchemaDocumentUpdater.update_document_metadata(document, meta)
|
|
105
|
+
return updated_document
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def create_test_document(index: int, type: str) -> Document:
|
|
109
|
+
"""
|
|
110
|
+
Creates a test document with sample content and metadata.
|
|
111
|
+
|
|
112
|
+
The content and metadata are generated based on the specified type ("Reg Wheel" or "Robot").
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
index (int): An index number to make the document unique.
|
|
116
|
+
type (str): The type of test document to create ("Reg Wheel" or "Robot").
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
A new test document.
|
|
120
|
+
"""
|
|
121
|
+
meta = {
|
|
122
|
+
"title": f"{type} Maintenance Record {index}",
|
|
123
|
+
"source_url": f"http://example.com/{type.lower()}_maintenance_{index}",
|
|
124
|
+
"file_type": "xlsx",
|
|
125
|
+
"page": index
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if type == "Reg Wheel":
|
|
129
|
+
page_content = ["| Date | Maintenance Performed | Technician | Notes |",
|
|
130
|
+
"|------------|-----------------------|------------|----------------------------|"]
|
|
131
|
+
for _ in range(random.randint(10, 20)):
|
|
132
|
+
day = random.randint(1, 28)
|
|
133
|
+
maintenance_performed = random.choice(["Oil Change", "Belt Replacement", "Alignment Check", "General Inspection"])
|
|
134
|
+
technician = random.choice(["John Doe", "Jane Smith", "Jim Brown"])
|
|
135
|
+
notes = random.choice(["Changed oil and filter", "Replaced worn-out belt", "Checked and adjusted align", "No issues found"])
|
|
136
|
+
page_content.append(f"| 2023-05-{day:02} | {maintenance_performed} | {technician} | {notes} |")
|
|
137
|
+
page_content = "\n".join(page_content)
|
|
138
|
+
else: # Robot
|
|
139
|
+
technicians = ["Bob", "Tim", "Alice"]
|
|
140
|
+
page_content = ["| Date | Maintenance Performed | Technician | Notes |",
|
|
141
|
+
"|------------|-----------------------|------------|-------------------------------------|"]
|
|
142
|
+
for _ in range(random.randint(10, 20)):
|
|
143
|
+
day = random.randint(1, 28)
|
|
144
|
+
maintenance_performed = random.choice(["Sensor Calibration", "Actuator Testing", "Software Update", "Battery Replacement"])
|
|
145
|
+
technician = random.choice(technicians)
|
|
146
|
+
notes = random.choice(["Calibrated all sensors", "Tested and replaced faulty actuators", "Updated robot software to v2.1", "Replaced old battery with new one"])
|
|
147
|
+
page_content.append(f"| 2023-05-{day:02} | {maintenance_performed} | {technician} | {notes} |")
|
|
148
|
+
page_content = "\n".join(page_content)
|
|
149
|
+
|
|
150
|
+
document = Document(page_content=page_content, metadata=meta)
|
|
151
|
+
return SchemaDocumentUpdater.add_sample_metadata(document, type)
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def create_test_documents(doc_num: int) -> List[Document]:
|
|
155
|
+
"""
|
|
156
|
+
Creates a list of test documents.
|
|
157
|
+
|
|
158
|
+
It generates a mix of "Reg Wheel" and "Robot" documents.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
doc_num (int): The total number of documents to create.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List[Document]: A list of created test documents.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
reg_wheel_docs_num = doc_num * 2 // 3
|
|
168
|
+
robot_docs_num = doc_num - reg_wheel_docs_num
|
|
169
|
+
|
|
170
|
+
documents = [SchemaDocumentUpdater.create_test_document(i+1, "Reg Wheel") for i in range(reg_wheel_docs_num)]
|
|
171
|
+
documents += [SchemaDocumentUpdater.create_test_document(i+1 + reg_wheel_docs_num, "Robot") for i in range(robot_docs_num)]
|
|
172
|
+
|
|
173
|
+
return documents
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import Dict, Any, Optional
|
|
2
|
+
from langchain_core.runnables import RunnableConfig
|
|
3
|
+
from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
|
|
4
|
+
from ..callbacks.async_langfuse_handler import AsyncLangfuseCallbackHandler
|
|
5
|
+
|
|
6
|
+
# Singleton holder for the Langfuse handler to avoid multiple instances per run
|
|
7
|
+
_LANGFUSE_HANDLER: Optional[LangfuseCallbackHandler] = None
|
|
8
|
+
_ASYNC_LANGFUSE_HANDLER: Optional[AsyncLangfuseCallbackHandler] = None
|
|
9
|
+
|
|
10
|
+
def _get_langfuse_handler() -> LangfuseCallbackHandler:
|
|
11
|
+
global _LANGFUSE_HANDLER
|
|
12
|
+
if _LANGFUSE_HANDLER is None:
|
|
13
|
+
_LANGFUSE_HANDLER = LangfuseCallbackHandler()
|
|
14
|
+
return _LANGFUSE_HANDLER
|
|
15
|
+
|
|
16
|
+
def get_langfuse_handler() -> LangfuseCallbackHandler:
|
|
17
|
+
return _get_langfuse_handler()
|
|
18
|
+
|
|
19
|
+
def get_async_langfuse_handler() -> "AsyncLangfuseCallbackHandler":
|
|
20
|
+
"""
|
|
21
|
+
Returns a singleton instance of the async Langfuse handler, ensuring it
|
|
22
|
+
shares the same underlying synchronous Langfuse handler singleton.
|
|
23
|
+
"""
|
|
24
|
+
global _ASYNC_LANGFUSE_HANDLER
|
|
25
|
+
if _ASYNC_LANGFUSE_HANDLER is None:
|
|
26
|
+
sync_handler = get_langfuse_handler()
|
|
27
|
+
_ASYNC_LANGFUSE_HANDLER = AsyncLangfuseCallbackHandler(sync_handler=sync_handler)
|
|
28
|
+
return _ASYNC_LANGFUSE_HANDLER
|
|
29
|
+
|
|
30
|
+
def prepare_trace_config(context: Dict[str, Any]) -> RunnableConfig:
|
|
31
|
+
"""
|
|
32
|
+
Prepares a minimal RunnableConfig for tracing, primarily for Langfuse.
|
|
33
|
+
|
|
34
|
+
- Creates a new config containing only tracing-related information.
|
|
35
|
+
- Extracts 'trace_metadata' from the context's 'configurable' dict
|
|
36
|
+
and uses it as the 'metadata' for the new trace config.
|
|
37
|
+
- Adds a singleton Langfuse callback handler.
|
|
38
|
+
"""
|
|
39
|
+
# The full config is passed in the 'config' key of the context
|
|
40
|
+
# Start with a copy of the existing config from the graph to preserve its state
|
|
41
|
+
run_config = context.get("config", {}).copy()
|
|
42
|
+
|
|
43
|
+
# Extract trace_metadata from the 'configurable' part of the full config
|
|
44
|
+
trace_metadata = run_config.get("trace_metadata", {})
|
|
45
|
+
if not trace_metadata:
|
|
46
|
+
trace_metadata = run_config.get("configurable", {}).get("trace_metadata", {})
|
|
47
|
+
|
|
48
|
+
# If trace_metadata exists, merge all its fields into the main metadata key
|
|
49
|
+
if trace_metadata and isinstance(trace_metadata, dict):
|
|
50
|
+
if "metadata" not in run_config:
|
|
51
|
+
run_config["metadata"] = {}
|
|
52
|
+
run_config["metadata"].update(trace_metadata)
|
|
53
|
+
|
|
54
|
+
return run_config
|
|
55
|
+
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from pymilvus import DataType, MilvusClient, AsyncMilvusClient
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
class MilvusSchemaManager:
|
|
7
|
+
"""
|
|
8
|
+
Manages Milvus/Milvus collection schemas.
|
|
9
|
+
|
|
10
|
+
This class provides functionalities to create and validate collection schemas
|
|
11
|
+
and index parameters based on a JSON definition. It interacts with a
|
|
12
|
+
MilvusClient instance to perform these operations.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, client: MilvusClient = None, async_client: AsyncMilvusClient = None, logger=None):
|
|
15
|
+
"""
|
|
16
|
+
Initializes the MilvusSchemaManager.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
client (MilvusClient): An instance of the Milvus client.
|
|
20
|
+
async_client (AsyncMilvusClient): An instance of the async Milvus client.
|
|
21
|
+
logger (logging.Logger, optional): A logger instance. If not provided,
|
|
22
|
+
a default logger will be created.
|
|
23
|
+
Defaults to None.
|
|
24
|
+
"""
|
|
25
|
+
self.client = client
|
|
26
|
+
self.async_client = async_client
|
|
27
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
def bind_client(self, client: MilvusClient):
|
|
30
|
+
"""
|
|
31
|
+
Binds a new MilvusClient instance to the manager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
client (MilvusClient): The Milvus client instance to use.
|
|
35
|
+
"""
|
|
36
|
+
self.client = client
|
|
37
|
+
|
|
38
|
+
def bind_async_client(self, async_client: AsyncMilvusClient):
|
|
39
|
+
"""
|
|
40
|
+
Binds a new AsyncMilvusClient instance to the manager.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
async_client (AsyncMilvusClient): The Milvus async client instance to use.
|
|
44
|
+
"""
|
|
45
|
+
self.async_client = async_client
|
|
46
|
+
|
|
47
|
+
def _add_array_field(self, schema, field_name, field_info):
|
|
48
|
+
"""
|
|
49
|
+
Adds an ARRAY field to the schema based on field information.
|
|
50
|
+
|
|
51
|
+
This is a helper method to handle the specific logic for creating ARRAY fields.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
schema: The Milvus schema object to add the field to.
|
|
55
|
+
field_name (str): The name of the field.
|
|
56
|
+
field_info (dict): A dictionary containing information about the field,
|
|
57
|
+
such as element type and max capacity.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If required information like 'element' or 'max_capacity'
|
|
61
|
+
is missing from field_info, or if an unsupported element
|
|
62
|
+
type is specified.
|
|
63
|
+
"""
|
|
64
|
+
element_type_str = field_info.get("element")
|
|
65
|
+
if not element_type_str:
|
|
66
|
+
raise ValueError(f"Array field '{field_name}' must have 'element' type specified.")
|
|
67
|
+
|
|
68
|
+
element_type = None
|
|
69
|
+
if element_type_str in ["STRING", "VARCHAR", "TEXT"]:
|
|
70
|
+
element_type = DataType.VARCHAR
|
|
71
|
+
elif element_type_str == "INT64":
|
|
72
|
+
element_type = DataType.INT64
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Unsupported element type '{element_type_str}' for ARRAY field '{field_name}'.")
|
|
75
|
+
|
|
76
|
+
max_capacity = field_info.get("max_capacity")
|
|
77
|
+
if max_capacity is None:
|
|
78
|
+
raise ValueError(f"Array field '{field_name}' must have 'max_capacity' specified.")
|
|
79
|
+
|
|
80
|
+
nullable = field_info.get('nullable', True)
|
|
81
|
+
|
|
82
|
+
field_args = {
|
|
83
|
+
"field_name": field_name,
|
|
84
|
+
"datatype": DataType.ARRAY,
|
|
85
|
+
"element_type": element_type,
|
|
86
|
+
"max_capacity": int(max_capacity),
|
|
87
|
+
"nullable": nullable,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if element_type == DataType.VARCHAR:
|
|
91
|
+
max_length = field_info.get('max_length', 65535)
|
|
92
|
+
field_args["max_length"] = int(max_length)
|
|
93
|
+
|
|
94
|
+
schema.add_field(**field_args)
|
|
95
|
+
|
|
96
|
+
def _build_collection_schema_from_dict(self, schema, schema_data):
|
|
97
|
+
fields = schema_data['node_types']['Document']['properties']
|
|
98
|
+
for field_name, field_info in fields.items():
|
|
99
|
+
field_type = field_info['type']
|
|
100
|
+
if field_type == "STRING" or field_type == "VARCHAR" or field_type == "TEXT":
|
|
101
|
+
max_length = field_info.get('max_length', 256) # Default max_length if not provided
|
|
102
|
+
nullable = field_info.get('nullable', False) # Default nullable if not provided
|
|
103
|
+
schema.add_field(field_name=field_name, datatype=DataType.VARCHAR, max_length=max_length, nullable=nullable)
|
|
104
|
+
elif field_type == "JSON":
|
|
105
|
+
nullable = field_info.get('nullable', True)
|
|
106
|
+
schema.add_field(field_name=field_name, datatype=DataType.JSON, nullable=nullable)
|
|
107
|
+
elif field_type == "INT64":
|
|
108
|
+
is_primary = field_info.get('is_primary', False)
|
|
109
|
+
auto_id = field_info.get('auto_id', False)
|
|
110
|
+
nullable = field_info.get('nullable', False)
|
|
111
|
+
schema.add_field(field_name=field_name, datatype=DataType.INT64, is_primary=is_primary, auto_id=auto_id, nullable=nullable)
|
|
112
|
+
elif field_type == "FLOAT":
|
|
113
|
+
nullable = field_info.get('nullable', True)
|
|
114
|
+
schema.add_field(field_name=field_name, datatype=DataType.FLOAT, nullable=nullable)
|
|
115
|
+
elif field_type == "ARRAY":
|
|
116
|
+
self._add_array_field(schema, field_name, field_info)
|
|
117
|
+
elif field_type == "FLOAT_VECTOR":
|
|
118
|
+
dim = field_info.get('dim', 1536) # Default dimension if not provided
|
|
119
|
+
schema.add_field(field_name=field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
|
120
|
+
else:
|
|
121
|
+
raise ValueError(f"Unknown field type: {field_type}")
|
|
122
|
+
return schema
|
|
123
|
+
|
|
124
|
+
def create_collection_schema(self, json_schema: str):
|
|
125
|
+
"""
|
|
126
|
+
Creates a Milvus collection schema from a JSON string.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
json_schema (str): A JSON string defining the schema.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
A Milvus schema object.
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If an unknown field type is encountered in the schema.
|
|
136
|
+
"""
|
|
137
|
+
schema_data = json.loads(json_schema)
|
|
138
|
+
schema = self.client.create_schema(auto_id=False, enable_dynamic_fields=True)
|
|
139
|
+
return self._build_collection_schema_from_dict(schema, schema_data)
|
|
140
|
+
|
|
141
|
+
async def acreate_collection_schema(self, json_schema: str):
|
|
142
|
+
"""
|
|
143
|
+
Asynchronously creates a Milvus collection schema from a JSON string.
|
|
144
|
+
"""
|
|
145
|
+
schema_data = json.loads(json_schema)
|
|
146
|
+
schema = AsyncMilvusClient.create_schema(auto_id=False, enable_dynamic_fields=True)
|
|
147
|
+
return self._build_collection_schema_from_dict(schema, schema_data)
|
|
148
|
+
|
|
149
|
+
def _build_index_params_from_dict(self, index_params, schema_data):
|
|
150
|
+
fields = schema_data['node_types']['Document']['properties']
|
|
151
|
+
|
|
152
|
+
if 'indexes' in schema_data['node_types']['Document']:
|
|
153
|
+
indexes = schema_data['node_types']['Document']['indexes']
|
|
154
|
+
for index_name, index_details in indexes.items():
|
|
155
|
+
field_name = index_details['fieldname']
|
|
156
|
+
index_type = index_details['type']
|
|
157
|
+
params = index_details['params']
|
|
158
|
+
index_params.add_index(
|
|
159
|
+
field_name=field_name,
|
|
160
|
+
index_type=index_type,
|
|
161
|
+
index_name=index_name,
|
|
162
|
+
params=params
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
for field_name, field_info in fields.items():
|
|
166
|
+
if field_info['type'] == "FLOAT_VECTOR":
|
|
167
|
+
index_params.add_index(
|
|
168
|
+
field_name=field_name,
|
|
169
|
+
index_name="vector",
|
|
170
|
+
index_type="AUTOINDEX",
|
|
171
|
+
metric_type="L2"
|
|
172
|
+
)
|
|
173
|
+
return index_params
|
|
174
|
+
|
|
175
|
+
def create_index_params(self, json_schema: str):
|
|
176
|
+
"""
|
|
177
|
+
Creates index parameters from a JSON schema string.
|
|
178
|
+
|
|
179
|
+
This method defines indexes based on the 'indexes' section of the schema
|
|
180
|
+
and automatically creates an 'AUTOINDEX' for any FLOAT_VECTOR fields.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
json_schema (str): A JSON string defining the schema and indexes.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Milvus index parameters object.
|
|
187
|
+
"""
|
|
188
|
+
schema_data = json.loads(json_schema)
|
|
189
|
+
index_params = self.client.prepare_index_params()
|
|
190
|
+
return self._build_index_params_from_dict(index_params, schema_data)
|
|
191
|
+
|
|
192
|
+
async def acreate_index_params(self, json_schema: str):
|
|
193
|
+
"""
|
|
194
|
+
Asynchronously creates index parameters from a JSON schema string.
|
|
195
|
+
"""
|
|
196
|
+
schema_data = json.loads(json_schema)
|
|
197
|
+
index_params = AsyncMilvusClient.prepare_index_params()
|
|
198
|
+
return self._build_index_params_from_dict(index_params, schema_data)
|
|
199
|
+
|
|
200
|
+
def create_collection(self, collection_name: str, json_schema: str):
|
|
201
|
+
"""
|
|
202
|
+
Creates a new collection in Milvus.
|
|
203
|
+
|
|
204
|
+
This method orchestrates the creation of the schema and index parameters
|
|
205
|
+
before creating the collection itself.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
collection_name (str): The name for the new collection.
|
|
209
|
+
json_schema (str): The JSON string defining the collection's schema
|
|
210
|
+
and indexes.
|
|
211
|
+
"""
|
|
212
|
+
if not self.client:
|
|
213
|
+
raise ValueError("Synchronous client not provided for create_collection.")
|
|
214
|
+
schema = self.create_collection_schema(json_schema)
|
|
215
|
+
index_params = self.create_index_params(json_schema)
|
|
216
|
+
|
|
217
|
+
self.client.create_collection(
|
|
218
|
+
collection_name=collection_name,
|
|
219
|
+
schema=schema,
|
|
220
|
+
index_params=index_params,
|
|
221
|
+
enable_dynamic_fields=True # we need to enable dynamic fields for schema updates
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
async def acreate_collection(self, collection_name: str, json_schema: str):
|
|
225
|
+
"""
|
|
226
|
+
Asynchronously creates a new collection in Milvus.
|
|
227
|
+
"""
|
|
228
|
+
if not self.async_client:
|
|
229
|
+
raise ValueError("Asynchronous client not provided for acreate_collection.")
|
|
230
|
+
|
|
231
|
+
schema = await self.acreate_collection_schema(json_schema)
|
|
232
|
+
index_params = await self.acreate_index_params(json_schema)
|
|
233
|
+
|
|
234
|
+
await self.async_client.create_collection(
|
|
235
|
+
collection_name=collection_name,
|
|
236
|
+
schema=schema,
|
|
237
|
+
index_params=index_params,
|
|
238
|
+
enable_dynamic_fields=True
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def validate_schema(self, json_schema: str) -> bool:
|
|
242
|
+
"""
|
|
243
|
+
Validates the given schema by attempting to create a collection schema and index params.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
json_schema (str): The schema JSON string to validate.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
bool: True if the schema is valid, False if any exceptions are caught.
|
|
250
|
+
"""
|
|
251
|
+
try:
|
|
252
|
+
self.create_collection_schema(json_schema)
|
|
253
|
+
self.create_index_params(json_schema)
|
|
254
|
+
return True
|
|
255
|
+
except Exception as e:
|
|
256
|
+
self.logger.error(f"Schema validation failed: {e}")
|
|
257
|
+
return False
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class ZillizSchemaManager(MilvusSchemaManager):
|
|
261
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
262
|
+
import warnings
|
|
263
|
+
|
|
264
|
+
warnings.warn(
|
|
265
|
+
"The ZillizSchemaManager class will be deprecated in the future. "
|
|
266
|
+
"Please use the MilvusSchemaManager class instead.",
|
|
267
|
+
DeprecationWarning,
|
|
268
|
+
stacklevel=2,
|
|
269
|
+
)
|
|
270
|
+
super().__init__(*args, **kwargs)
|