crewplus 0.2.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ from typing import List
2
+ from langchain_core.documents import Document
3
+ import random
4
+
5
+ class SchemaDocumentUpdater:
6
+ """A utility class for updating and creating LangChain Documents with specific metadata schemas."""
7
+
8
+ @staticmethod
9
+ def update_document_metadata(document: Document, metadata: dict) -> Document:
10
+ """
11
+ Updates the metadata of a LangChain Document.
12
+
13
+ Args:
14
+ document (Document): The document to update.
15
+ metadata (dict): A dictionary containing the metadata to add or update.
16
+
17
+ Returns:
18
+ Document: The updated document with the new metadata.
19
+ """
20
+ metadata_updates = document.metadata
21
+
22
+ for key, value in metadata.items():
23
+ metadata_updates[key] = value
24
+
25
+ return Document(
26
+ page_content=document.page_content,
27
+ metadata=metadata_updates
28
+ )
29
+
30
+ @staticmethod
31
+ def delete_document_metadata(document: Document, keys_to_delete: List[str]) -> Document:
32
+ """
33
+ Deletes specified keys from the metadata of a LangChain Document.
34
+
35
+ Args:
36
+ document (Document): The document to update.
37
+ keys_to_delete (List[str]): A list of keys to delete from the metadata.
38
+
39
+ Returns:
40
+ Document: The updated document with the specified metadata keys removed.
41
+ """
42
+ metadata = document.metadata
43
+
44
+ for key in keys_to_delete:
45
+ if key in metadata:
46
+ del metadata[key]
47
+
48
+ return Document(
49
+ page_content=document.page_content,
50
+ metadata=metadata
51
+ )
52
+
53
+ @staticmethod
54
+ def add_sample_metadata(document: Document, type: str) -> Document:
55
+ """
56
+ Adds sample metadata to a document based on a specified type.
57
+
58
+ The metadata schema is tailored for either "Reg Wheel" or "Robot" types.
59
+
60
+ Args:
61
+ document (Document): The document to which sample metadata will be added.
62
+ type (str): The type of sample metadata to add ("Reg Wheel" or "Robot").
63
+
64
+ Returns:
65
+ Document: The document with added sample metadata.
66
+ """
67
+ if type == "Reg Wheel":
68
+ meta = {
69
+ "keywords": "Reg Wheel",
70
+ "plant_metadata": {
71
+ "entity_id": "EQUIP_123",
72
+ "entity_type": "Machine",
73
+ "hierarchy_path": "/EnterpriseA/SITE_A/LINE_003/",
74
+ "entity_tags": ["nickname_for_EQUIP_123", "PB3"],
75
+ "parent_entity": None,
76
+ "linked_entities": []
77
+ },
78
+ "version_metadata": {
79
+ "version_id": "V2.0",
80
+ "version_tags": ["global"],
81
+ "version_date": "2024/05/23"
82
+ },
83
+ "other_metadata": {}
84
+ }
85
+ else: # Robot
86
+ meta = {
87
+ "keywords": "Robot",
88
+ "plant_metadata": {
89
+ "entity_id": "EQUIP_124",
90
+ "entity_type": "Robot",
91
+ "hierarchy_path": "/EnterpriseA/SITE_A/LINE_002/",
92
+ "entity_tags": ["nickname_for_EQUIP_124", "RB2"],
93
+ "parent_entity": None,
94
+ "linked_entities": []
95
+ },
96
+ "version_metadata": {
97
+ "version_id": "R1.0",
98
+ "version_tags": ["prototype"],
99
+ "version_date": "2024/05/23"
100
+ },
101
+ "other_metadata": {}
102
+ }
103
+
104
+ updated_document = SchemaDocumentUpdater.update_document_metadata(document, meta)
105
+ return updated_document
106
+
107
+ @staticmethod
108
+ def create_test_document(index: int, type: str) -> Document:
109
+ """
110
+ Creates a test document with sample content and metadata.
111
+
112
+ The content and metadata are generated based on the specified type ("Reg Wheel" or "Robot").
113
+
114
+ Args:
115
+ index (int): An index number to make the document unique.
116
+ type (str): The type of test document to create ("Reg Wheel" or "Robot").
117
+
118
+ Returns:
119
+ A new test document.
120
+ """
121
+ meta = {
122
+ "title": f"{type} Maintenance Record {index}",
123
+ "source_url": f"http://example.com/{type.lower()}_maintenance_{index}",
124
+ "file_type": "xlsx",
125
+ "page": index
126
+ }
127
+
128
+ if type == "Reg Wheel":
129
+ page_content = ["| Date | Maintenance Performed | Technician | Notes |",
130
+ "|------------|-----------------------|------------|----------------------------|"]
131
+ for _ in range(random.randint(10, 20)):
132
+ day = random.randint(1, 28)
133
+ maintenance_performed = random.choice(["Oil Change", "Belt Replacement", "Alignment Check", "General Inspection"])
134
+ technician = random.choice(["John Doe", "Jane Smith", "Jim Brown"])
135
+ notes = random.choice(["Changed oil and filter", "Replaced worn-out belt", "Checked and adjusted align", "No issues found"])
136
+ page_content.append(f"| 2023-05-{day:02} | {maintenance_performed} | {technician} | {notes} |")
137
+ page_content = "\n".join(page_content)
138
+ else: # Robot
139
+ technicians = ["Bob", "Tim", "Alice"]
140
+ page_content = ["| Date | Maintenance Performed | Technician | Notes |",
141
+ "|------------|-----------------------|------------|-------------------------------------|"]
142
+ for _ in range(random.randint(10, 20)):
143
+ day = random.randint(1, 28)
144
+ maintenance_performed = random.choice(["Sensor Calibration", "Actuator Testing", "Software Update", "Battery Replacement"])
145
+ technician = random.choice(technicians)
146
+ notes = random.choice(["Calibrated all sensors", "Tested and replaced faulty actuators", "Updated robot software to v2.1", "Replaced old battery with new one"])
147
+ page_content.append(f"| 2023-05-{day:02} | {maintenance_performed} | {technician} | {notes} |")
148
+ page_content = "\n".join(page_content)
149
+
150
+ document = Document(page_content=page_content, metadata=meta)
151
+ return SchemaDocumentUpdater.add_sample_metadata(document, type)
152
+
153
+ @staticmethod
154
+ def create_test_documents(doc_num: int) -> List[Document]:
155
+ """
156
+ Creates a list of test documents.
157
+
158
+ It generates a mix of "Reg Wheel" and "Robot" documents.
159
+
160
+ Args:
161
+ doc_num (int): The total number of documents to create.
162
+
163
+ Returns:
164
+ List[Document]: A list of created test documents.
165
+ """
166
+
167
+ reg_wheel_docs_num = doc_num * 2 // 3
168
+ robot_docs_num = doc_num - reg_wheel_docs_num
169
+
170
+ documents = [SchemaDocumentUpdater.create_test_document(i+1, "Reg Wheel") for i in range(reg_wheel_docs_num)]
171
+ documents += [SchemaDocumentUpdater.create_test_document(i+1 + reg_wheel_docs_num, "Robot") for i in range(robot_docs_num)]
172
+
173
+ return documents
@@ -0,0 +1,55 @@
1
+ from typing import Dict, Any, Optional
2
+ from langchain_core.runnables import RunnableConfig
3
+ from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
4
+ from ..callbacks.async_langfuse_handler import AsyncLangfuseCallbackHandler
5
+
6
+ # Singleton holder for the Langfuse handler to avoid multiple instances per run
7
+ _LANGFUSE_HANDLER: Optional[LangfuseCallbackHandler] = None
8
+ _ASYNC_LANGFUSE_HANDLER: Optional[AsyncLangfuseCallbackHandler] = None
9
+
10
+ def _get_langfuse_handler() -> LangfuseCallbackHandler:
11
+ global _LANGFUSE_HANDLER
12
+ if _LANGFUSE_HANDLER is None:
13
+ _LANGFUSE_HANDLER = LangfuseCallbackHandler()
14
+ return _LANGFUSE_HANDLER
15
+
16
+ def get_langfuse_handler() -> LangfuseCallbackHandler:
17
+ return _get_langfuse_handler()
18
+
19
+ def get_async_langfuse_handler() -> "AsyncLangfuseCallbackHandler":
20
+ """
21
+ Returns a singleton instance of the async Langfuse handler, ensuring it
22
+ shares the same underlying synchronous Langfuse handler singleton.
23
+ """
24
+ global _ASYNC_LANGFUSE_HANDLER
25
+ if _ASYNC_LANGFUSE_HANDLER is None:
26
+ sync_handler = get_langfuse_handler()
27
+ _ASYNC_LANGFUSE_HANDLER = AsyncLangfuseCallbackHandler(sync_handler=sync_handler)
28
+ return _ASYNC_LANGFUSE_HANDLER
29
+
30
+ def prepare_trace_config(context: Dict[str, Any]) -> RunnableConfig:
31
+ """
32
+ Prepares a minimal RunnableConfig for tracing, primarily for Langfuse.
33
+
34
+ - Creates a new config containing only tracing-related information.
35
+ - Extracts 'trace_metadata' from the context's 'configurable' dict
36
+ and uses it as the 'metadata' for the new trace config.
37
+ - Adds a singleton Langfuse callback handler.
38
+ """
39
+ # The full config is passed in the 'config' key of the context
40
+ # Start with a copy of the existing config from the graph to preserve its state
41
+ run_config = context.get("config", {}).copy()
42
+
43
+ # Extract trace_metadata from the 'configurable' part of the full config
44
+ trace_metadata = run_config.get("trace_metadata", {})
45
+ if not trace_metadata:
46
+ trace_metadata = run_config.get("configurable", {}).get("trace_metadata", {})
47
+
48
+ # If trace_metadata exists, merge all its fields into the main metadata key
49
+ if trace_metadata and isinstance(trace_metadata, dict):
50
+ if "metadata" not in run_config:
51
+ run_config["metadata"] = {}
52
+ run_config["metadata"].update(trace_metadata)
53
+
54
+ return run_config
55
+
@@ -0,0 +1,5 @@
1
+ from .milvus_schema_manager import MilvusSchemaManager, ZillizSchemaManager
2
+ from .schema_milvus import SchemaMilvus
3
+ from .vdb_service import VDBService
4
+
5
+ __all__ = ["MilvusSchemaManager", "ZillizSchemaManager", "VDBService", "SchemaMilvus"]
@@ -0,0 +1,270 @@
1
+ from pymilvus import DataType, MilvusClient, AsyncMilvusClient
2
+ import json
3
+ import logging
4
+ from typing import Any
5
+
6
+ class MilvusSchemaManager:
7
+ """
8
+ Manages Milvus/Milvus collection schemas.
9
+
10
+ This class provides functionalities to create and validate collection schemas
11
+ and index parameters based on a JSON definition. It interacts with a
12
+ MilvusClient instance to perform these operations.
13
+ """
14
+ def __init__(self, client: MilvusClient = None, async_client: AsyncMilvusClient = None, logger=None):
15
+ """
16
+ Initializes the MilvusSchemaManager.
17
+
18
+ Args:
19
+ client (MilvusClient): An instance of the Milvus client.
20
+ async_client (AsyncMilvusClient): An instance of the async Milvus client.
21
+ logger (logging.Logger, optional): A logger instance. If not provided,
22
+ a default logger will be created.
23
+ Defaults to None.
24
+ """
25
+ self.client = client
26
+ self.async_client = async_client
27
+ self.logger = logger or logging.getLogger(__name__)
28
+
29
+ def bind_client(self, client: MilvusClient):
30
+ """
31
+ Binds a new MilvusClient instance to the manager.
32
+
33
+ Args:
34
+ client (MilvusClient): The Milvus client instance to use.
35
+ """
36
+ self.client = client
37
+
38
+ def bind_async_client(self, async_client: AsyncMilvusClient):
39
+ """
40
+ Binds a new AsyncMilvusClient instance to the manager.
41
+
42
+ Args:
43
+ async_client (AsyncMilvusClient): The Milvus async client instance to use.
44
+ """
45
+ self.async_client = async_client
46
+
47
+ def _add_array_field(self, schema, field_name, field_info):
48
+ """
49
+ Adds an ARRAY field to the schema based on field information.
50
+
51
+ This is a helper method to handle the specific logic for creating ARRAY fields.
52
+
53
+ Args:
54
+ schema: The Milvus schema object to add the field to.
55
+ field_name (str): The name of the field.
56
+ field_info (dict): A dictionary containing information about the field,
57
+ such as element type and max capacity.
58
+
59
+ Raises:
60
+ ValueError: If required information like 'element' or 'max_capacity'
61
+ is missing from field_info, or if an unsupported element
62
+ type is specified.
63
+ """
64
+ element_type_str = field_info.get("element")
65
+ if not element_type_str:
66
+ raise ValueError(f"Array field '{field_name}' must have 'element' type specified.")
67
+
68
+ element_type = None
69
+ if element_type_str in ["STRING", "VARCHAR", "TEXT"]:
70
+ element_type = DataType.VARCHAR
71
+ elif element_type_str == "INT64":
72
+ element_type = DataType.INT64
73
+ else:
74
+ raise ValueError(f"Unsupported element type '{element_type_str}' for ARRAY field '{field_name}'.")
75
+
76
+ max_capacity = field_info.get("max_capacity")
77
+ if max_capacity is None:
78
+ raise ValueError(f"Array field '{field_name}' must have 'max_capacity' specified.")
79
+
80
+ nullable = field_info.get('nullable', True)
81
+
82
+ field_args = {
83
+ "field_name": field_name,
84
+ "datatype": DataType.ARRAY,
85
+ "element_type": element_type,
86
+ "max_capacity": int(max_capacity),
87
+ "nullable": nullable,
88
+ }
89
+
90
+ if element_type == DataType.VARCHAR:
91
+ max_length = field_info.get('max_length', 65535)
92
+ field_args["max_length"] = int(max_length)
93
+
94
+ schema.add_field(**field_args)
95
+
96
+ def _build_collection_schema_from_dict(self, schema, schema_data):
97
+ fields = schema_data['node_types']['Document']['properties']
98
+ for field_name, field_info in fields.items():
99
+ field_type = field_info['type']
100
+ if field_type == "STRING" or field_type == "VARCHAR" or field_type == "TEXT":
101
+ max_length = field_info.get('max_length', 256) # Default max_length if not provided
102
+ nullable = field_info.get('nullable', False) # Default nullable if not provided
103
+ schema.add_field(field_name=field_name, datatype=DataType.VARCHAR, max_length=max_length, nullable=nullable)
104
+ elif field_type == "JSON":
105
+ nullable = field_info.get('nullable', True)
106
+ schema.add_field(field_name=field_name, datatype=DataType.JSON, nullable=nullable)
107
+ elif field_type == "INT64":
108
+ is_primary = field_info.get('is_primary', False)
109
+ auto_id = field_info.get('auto_id', False)
110
+ nullable = field_info.get('nullable', False)
111
+ schema.add_field(field_name=field_name, datatype=DataType.INT64, is_primary=is_primary, auto_id=auto_id, nullable=nullable)
112
+ elif field_type == "FLOAT":
113
+ nullable = field_info.get('nullable', True)
114
+ schema.add_field(field_name=field_name, datatype=DataType.FLOAT, nullable=nullable)
115
+ elif field_type == "ARRAY":
116
+ self._add_array_field(schema, field_name, field_info)
117
+ elif field_type == "FLOAT_VECTOR":
118
+ dim = field_info.get('dim', 1536) # Default dimension if not provided
119
+ schema.add_field(field_name=field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
120
+ else:
121
+ raise ValueError(f"Unknown field type: {field_type}")
122
+ return schema
123
+
124
+ def create_collection_schema(self, json_schema: str):
125
+ """
126
+ Creates a Milvus collection schema from a JSON string.
127
+
128
+ Args:
129
+ json_schema (str): A JSON string defining the schema.
130
+
131
+ Returns:
132
+ A Milvus schema object.
133
+
134
+ Raises:
135
+ ValueError: If an unknown field type is encountered in the schema.
136
+ """
137
+ schema_data = json.loads(json_schema)
138
+ schema = self.client.create_schema(auto_id=False, enable_dynamic_fields=True)
139
+ return self._build_collection_schema_from_dict(schema, schema_data)
140
+
141
+ async def acreate_collection_schema(self, json_schema: str):
142
+ """
143
+ Asynchronously creates a Milvus collection schema from a JSON string.
144
+ """
145
+ schema_data = json.loads(json_schema)
146
+ schema = AsyncMilvusClient.create_schema(auto_id=False, enable_dynamic_fields=True)
147
+ return self._build_collection_schema_from_dict(schema, schema_data)
148
+
149
+ def _build_index_params_from_dict(self, index_params, schema_data):
150
+ fields = schema_data['node_types']['Document']['properties']
151
+
152
+ if 'indexes' in schema_data['node_types']['Document']:
153
+ indexes = schema_data['node_types']['Document']['indexes']
154
+ for index_name, index_details in indexes.items():
155
+ field_name = index_details['fieldname']
156
+ index_type = index_details['type']
157
+ params = index_details['params']
158
+ index_params.add_index(
159
+ field_name=field_name,
160
+ index_type=index_type,
161
+ index_name=index_name,
162
+ params=params
163
+ )
164
+
165
+ for field_name, field_info in fields.items():
166
+ if field_info['type'] == "FLOAT_VECTOR":
167
+ index_params.add_index(
168
+ field_name=field_name,
169
+ index_name="vector",
170
+ index_type="AUTOINDEX",
171
+ metric_type="L2"
172
+ )
173
+ return index_params
174
+
175
+ def create_index_params(self, json_schema: str):
176
+ """
177
+ Creates index parameters from a JSON schema string.
178
+
179
+ This method defines indexes based on the 'indexes' section of the schema
180
+ and automatically creates an 'AUTOINDEX' for any FLOAT_VECTOR fields.
181
+
182
+ Args:
183
+ json_schema (str): A JSON string defining the schema and indexes.
184
+
185
+ Returns:
186
+ Milvus index parameters object.
187
+ """
188
+ schema_data = json.loads(json_schema)
189
+ index_params = self.client.prepare_index_params()
190
+ return self._build_index_params_from_dict(index_params, schema_data)
191
+
192
+ async def acreate_index_params(self, json_schema: str):
193
+ """
194
+ Asynchronously creates index parameters from a JSON schema string.
195
+ """
196
+ schema_data = json.loads(json_schema)
197
+ index_params = AsyncMilvusClient.prepare_index_params()
198
+ return self._build_index_params_from_dict(index_params, schema_data)
199
+
200
+ def create_collection(self, collection_name: str, json_schema: str):
201
+ """
202
+ Creates a new collection in Milvus.
203
+
204
+ This method orchestrates the creation of the schema and index parameters
205
+ before creating the collection itself.
206
+
207
+ Args:
208
+ collection_name (str): The name for the new collection.
209
+ json_schema (str): The JSON string defining the collection's schema
210
+ and indexes.
211
+ """
212
+ if not self.client:
213
+ raise ValueError("Synchronous client not provided for create_collection.")
214
+ schema = self.create_collection_schema(json_schema)
215
+ index_params = self.create_index_params(json_schema)
216
+
217
+ self.client.create_collection(
218
+ collection_name=collection_name,
219
+ schema=schema,
220
+ index_params=index_params,
221
+ enable_dynamic_fields=True # we need to enable dynamic fields for schema updates
222
+ )
223
+
224
+ async def acreate_collection(self, collection_name: str, json_schema: str):
225
+ """
226
+ Asynchronously creates a new collection in Milvus.
227
+ """
228
+ if not self.async_client:
229
+ raise ValueError("Asynchronous client not provided for acreate_collection.")
230
+
231
+ schema = await self.acreate_collection_schema(json_schema)
232
+ index_params = await self.acreate_index_params(json_schema)
233
+
234
+ await self.async_client.create_collection(
235
+ collection_name=collection_name,
236
+ schema=schema,
237
+ index_params=index_params,
238
+ enable_dynamic_fields=True
239
+ )
240
+
241
+ def validate_schema(self, json_schema: str) -> bool:
242
+ """
243
+ Validates the given schema by attempting to create a collection schema and index params.
244
+
245
+ Args:
246
+ json_schema (str): The schema JSON string to validate.
247
+
248
+ Returns:
249
+ bool: True if the schema is valid, False if any exceptions are caught.
250
+ """
251
+ try:
252
+ self.create_collection_schema(json_schema)
253
+ self.create_index_params(json_schema)
254
+ return True
255
+ except Exception as e:
256
+ self.logger.error(f"Schema validation failed: {e}")
257
+ return False
258
+
259
+
260
+ class ZillizSchemaManager(MilvusSchemaManager):
261
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
262
+ import warnings
263
+
264
+ warnings.warn(
265
+ "The ZillizSchemaManager class will be deprecated in the future. "
266
+ "Please use the MilvusSchemaManager class instead.",
267
+ DeprecationWarning,
268
+ stacklevel=2,
269
+ )
270
+ super().__init__(*args, **kwargs)