PyPI - mlrun - Versions diffs - 1.8.0rc4__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl - Mend

mlrun 1.8.0rc4py3-none-any.whl → 1.8.0rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (69) hide show

mlrun/__init__.py +4 -3
mlrun/alerts/alert.py +129 -2
mlrun/artifacts/__init__.py +1 -1
mlrun/artifacts/base.py +12 -1
mlrun/artifacts/document.py +59 -38
mlrun/common/model_monitoring/__init__.py +0 -2
mlrun/common/model_monitoring/helpers.py +0 -28
mlrun/common/schemas/__init__.py +1 -4
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/artifact.py +4 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +0 -6
mlrun/common/schemas/model_monitoring/constants.py +11 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
mlrun/common/schemas/notification.py +6 -0
mlrun/config.py +0 -2
mlrun/datastore/datastore_profile.py +57 -17
mlrun/datastore/vectorstore.py +67 -59
mlrun/db/base.py +22 -18
mlrun/db/factory.py +0 -3
mlrun/db/httpdb.py +122 -150
mlrun/db/nopdb.py +33 -17
mlrun/execution.py +43 -29
mlrun/model.py +7 -0
mlrun/model_monitoring/__init__.py +3 -2
mlrun/model_monitoring/api.py +40 -43
mlrun/model_monitoring/applications/_application_steps.py +4 -2
mlrun/model_monitoring/applications/base.py +65 -6
mlrun/model_monitoring/applications/context.py +64 -33
mlrun/model_monitoring/applications/evidently_base.py +0 -1
mlrun/model_monitoring/applications/histogram_data_drift.py +2 -6
mlrun/model_monitoring/controller.py +43 -37
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/tsdb/base.py +2 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
mlrun/model_monitoring/helpers.py +12 -66
mlrun/model_monitoring/stream_processing.py +83 -270
mlrun/model_monitoring/writer.py +1 -10
mlrun/projects/project.py +87 -74
mlrun/runtimes/nuclio/function.py +7 -6
mlrun/runtimes/nuclio/serving.py +7 -1
mlrun/serving/routers.py +158 -145
mlrun/serving/server.py +6 -0
mlrun/serving/states.py +2 -0
mlrun/serving/v2_serving.py +69 -60
mlrun/utils/helpers.py +14 -30
mlrun/utils/notifications/notification/mail.py +36 -9
mlrun/utils/notifications/notification_pusher.py +34 -13
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/METADATA +5 -4
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/RECORD +56 -69
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
mlrun/model_monitoring/db/stores/base/store.py +0 -154
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
mlrun/model_monitoring/model_endpoint.py +0 -120
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -11,27 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import enum
+import abc
 import json
 from datetime import datetime
 from typing import Any, NamedTuple, Optional, TypeVar
-from pydantic.v1 import BaseModel, Extra, Field, constr, validator
+from pydantic.v1 import BaseModel, Field, constr
 # TODO: remove the unused import below after `mlrun.datastore` and `mlrun.utils` usage is removed.
 # At the moment `make lint` fails if this is removed.
-import mlrun.common.model_monitoring
-from ..object import ObjectKind, ObjectSpec, ObjectStatus
+from ..object import ObjectKind, ObjectMetadata, ObjectSpec, ObjectStatus
+from . import ModelEndpointSchema
 from .constants import (
     FQN_REGEX,
     MODEL_ENDPOINT_ID_PATTERN,
     PROJECT_PATTERN,
     EndpointType,
-    EventFieldType,
-    EventKeyMetrics,
-    EventLiveStats,
     ModelEndpointMonitoringMetricType,
     ModelMonitoringMode,
     ResultKindApp,
@@ -47,81 +42,6 @@ class ModelMonitoringStoreKinds:
     EVENTS = "events"
-class ModelEndpointMetadata(BaseModel):
-    project: constr(regex=PROJECT_PATTERN)
-    uid: constr(regex=MODEL_ENDPOINT_ID_PATTERN)
-    labels: Optional[dict] = {}
-    class Config:
-        extra = Extra.allow
-    @classmethod
-    def from_flat_dict(
-        cls, endpoint_dict: dict, json_parse_values: Optional[list] = None
-    ):
-        """Create a `ModelEndpointMetadata` object from an endpoint dictionary
-        :param endpoint_dict:     Model endpoint dictionary.
-        :param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
-                                  dictionary using json.loads().
-        """
-        if json_parse_values is None:
-            json_parse_values = [EventFieldType.LABELS]
-        return _mapping_attributes(
-            model_class=cls,
-            flattened_dictionary=endpoint_dict,
-            json_parse_values=json_parse_values,
-        )
-class ModelEndpointSpec(ObjectSpec):
-    function_uri: Optional[str] = ""  # <project_name>/<function_name>:<tag>
-    model: Optional[str] = ""  # <model_name>:<version>
-    model_class: Optional[str] = ""
-    model_uri: Optional[str] = ""
-    feature_names: Optional[list[str]] = []
-    label_names: Optional[list[str]] = []
-    stream_path: Optional[str] = ""
-    algorithm: Optional[str] = ""
-    monitor_configuration: Optional[dict] = {}
-    active: Optional[bool] = True
-    monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled.value
-    @classmethod
-    def from_flat_dict(
-        cls, endpoint_dict: dict, json_parse_values: Optional[list] = None
-    ):
-        """Create a `ModelEndpointSpec` object from an endpoint dictionary
-        :param endpoint_dict:     Model endpoint dictionary.
-        :param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
-                                  dictionary using json.loads().
-        """
-        if json_parse_values is None:
-            json_parse_values = [
-                EventFieldType.FEATURE_NAMES,
-                EventFieldType.LABEL_NAMES,
-                EventFieldType.MONITOR_CONFIGURATION,
-            ]
-        return _mapping_attributes(
-            model_class=cls,
-            flattened_dictionary=endpoint_dict,
-            json_parse_values=json_parse_values,
-        )
-    @validator("model_uri")
-    @classmethod
-    def validate_model_uri(cls, model_uri):
-        """Validate that the model uri includes the required prefix"""
-        prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
-        if prefix and prefix != mlrun.utils.helpers.StorePrefix.Model:
-            return mlrun.datastore.get_store_uri(
-                mlrun.utils.helpers.StorePrefix.Model, uri
-            )
-        return model_uri
 class Histogram(BaseModel):
     buckets: list[float]
     counts: list[int]
@@ -167,50 +87,24 @@ class Features(BaseModel):
         )
-class ModelEndpointStatus(ObjectStatus):
-    feature_stats: Optional[dict] = {}
-    current_stats: Optional[dict] = {}
-    first_request: Optional[str] = ""
-    last_request: Optional[str] = ""
-    error_count: Optional[int] = 0
-    drift_status: Optional[str] = ""
-    drift_measures: Optional[dict] = {}
-    metrics: Optional[dict[str, dict[str, Any]]] = {
-        EventKeyMetrics.GENERIC: {
-            EventLiveStats.LATENCY_AVG_1H: 0,
-            EventLiveStats.PREDICTIONS_PER_SECOND: 0,
-        }
-    }
-    features: Optional[list[Features]] = []
-    children: Optional[list[str]] = []
-    children_uids: Optional[list[str]] = []
-    endpoint_type: Optional[EndpointType] = EndpointType.NODE_EP
-    monitoring_feature_set_uri: Optional[str] = ""
-    state: Optional[str] = ""
-    class Config:
-        extra = Extra.allow
+class ModelEndpointParser(abc.ABC, BaseModel):
+    @classmethod
+    def json_parse_values(cls) -> list[str]:
+        return []
     @classmethod
     def from_flat_dict(
         cls, endpoint_dict: dict, json_parse_values: Optional[list] = None
-    ):
-        """Create a `ModelEndpointStatus` object from an endpoint dictionary
+    ) -> "ModelEndpointParser":
+        """Create a `ModelEndpointParser` object from an endpoint dictionary
         :param endpoint_dict:     Model endpoint dictionary.
         :param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
                                   dictionary using json.loads().
         """
         if json_parse_values is None:
-            json_parse_values = [
-                EventFieldType.FEATURE_STATS,
-                EventFieldType.CURRENT_STATS,
-                EventFieldType.DRIFT_MEASURES,
-                EventFieldType.METRICS,
-                EventFieldType.CHILDREN,
-                EventFieldType.CHILDREN_UIDS,
-                EventFieldType.ENDPOINT_TYPE,
-            ]
+            json_parse_values = cls.json_parse_values()
         return _mapping_attributes(
             model_class=cls,
             flattened_dictionary=endpoint_dict,
@@ -218,16 +112,53 @@ class ModelEndpointStatus(ObjectStatus):
         )
+class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
+    project: constr(regex=PROJECT_PATTERN)
+    endpoint_type: EndpointType = EndpointType.NODE_EP
+    uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
+class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
+    model_uid: Optional[str] = ""
+    model_name: Optional[str] = ""
+    model_tag: Optional[str] = ""
+    model_class: Optional[str] = ""
+    function_name: Optional[str] = ""
+    function_tag: Optional[str] = ""
+    function_uid: Optional[str] = ""
+    feature_names: Optional[list[str]] = []
+    label_names: Optional[list[str]] = []
+    feature_stats: Optional[dict] = {}
+    function_uri: Optional[str] = ""  # <project_name>/<function_hash>
+    model_uri: Optional[str] = ""
+    children: Optional[list[str]] = []
+    children_uids: Optional[list[str]] = []
+    monitoring_feature_set_uri: Optional[str] = ""
+class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
+    state: Optional[str] = "unknown"  # will be updated according to the function state
+    first_request: Optional[datetime] = None
+    monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled
+    # operative
+    last_request: Optional[datetime] = None
+    result_status: Optional[int] = -1
+    avg_latency: Optional[float] = None
+    error_count: Optional[int] = 0
+    current_stats: Optional[dict] = {}
+    current_stats_timestamp: Optional[datetime] = None
+    drift_measures: Optional[dict] = {}
+    drift_measures_timestamp: Optional[datetime] = None
 class ModelEndpoint(BaseModel):
     kind: ObjectKind = Field(ObjectKind.model_endpoint, const=True)
     metadata: ModelEndpointMetadata
-    spec: ModelEndpointSpec = ModelEndpointSpec()
-    status: ModelEndpointStatus = ModelEndpointStatus()
+    spec: ModelEndpointSpec
+    status: ModelEndpointStatus
-    class Config:
-        extra = Extra.allow
-    def flat_dict(self):
+    def flat_dict(self) -> dict[str, Any]:
         """Generate a flattened `ModelEndpoint` dictionary. The flattened dictionary result is important for storing
         the model endpoint object in the database.
@@ -235,35 +166,24 @@ class ModelEndpoint(BaseModel):
         """
         # Convert the ModelEndpoint object into a dictionary using BaseModel dict() function
         # In addition, remove the BaseModel kind as it is not required by the DB schema
-        model_endpoint_dictionary = self.dict(exclude={"kind"})
+        model_endpoint_dictionary = self.dict(exclude={"kind"})
+        exclude = {
+            "tag",
+            ModelEndpointSchema.FEATURE_STATS,
+            ModelEndpointSchema.CURRENT_STATS,
+            ModelEndpointSchema.DRIFT_MEASURES,
+            ModelEndpointSchema.FUNCTION_URI,
+            ModelEndpointSchema.MODEL_URI,
+        }
         # Initialize a flattened dictionary that will be filled with the model endpoint dictionary attributes
         flatten_dict = {}
         for k_object in model_endpoint_dictionary:
             for key in model_endpoint_dictionary[k_object]:
-                # Extract the value of the current field
-                current_value = model_endpoint_dictionary[k_object][key]
-                # If the value is not from type str or bool (e.g. dict), convert it into a JSON string
-                # for matching the database required format
-                if not isinstance(current_value, (str, bool, int)) or isinstance(
-                    current_value, enum.IntEnum
-                ):
-                    flatten_dict[key] = json.dumps(current_value)
-                else:
-                    flatten_dict[key] = current_value
-        if EventFieldType.METRICS not in flatten_dict:
-            # Initialize metrics dictionary
-            flatten_dict[EventFieldType.METRICS] = {
-                EventKeyMetrics.GENERIC: {
-                    EventLiveStats.LATENCY_AVG_1H: 0,
-                    EventLiveStats.PREDICTIONS_PER_SECOND: 0,
-                }
-            }
-        # Remove the features from the dictionary as this field will be filled only within the feature analysis process
-        flatten_dict.pop(EventFieldType.FEATURES, None)
+                if key not in exclude:
+                    # Extract the value of the current field
+                    flatten_dict[key] = model_endpoint_dictionary[k_object][key]
         return flatten_dict
     @classmethod
@@ -280,9 +200,17 @@ class ModelEndpoint(BaseModel):
             status=ModelEndpointStatus.from_flat_dict(endpoint_dict=endpoint_dict),
         )
+    def get(self, field, default=None):
+        return (
+            getattr(self.metadata, field, None)
+            or getattr(self.spec, field, None)
+            or getattr(self.status, field, None)
+            or default
+        )
 class ModelEndpointList(BaseModel):
-    endpoints: list[ModelEndpoint] = []
+    endpoints: list[ModelEndpoint]
 class ModelEndpointMonitoringMetric(BaseModel):

mlrun/common/schemas/notification.py CHANGED Viewed

@@ -132,8 +132,14 @@ class SetNotificationRequest(pydantic.v1.BaseModel):
     notifications: list[Notification] = None
+class NotificationSummary(pydantic.v1.BaseModel):
+    failed: int = 0
+    succeeded: int = 0
 class NotificationState(pydantic.v1.BaseModel):
     kind: str
     err: Optional[
         str
     ]  # empty error means that the notifications were sent successfully
+    summary: NotificationSummary

mlrun/config.py CHANGED Viewed

@@ -607,8 +607,6 @@ default_config = {
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
-        "endpoint_store_connection": "",
         # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
         "tsdb_connection": "",
         # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -81,22 +81,62 @@ class DatastoreProfileBasic(DatastoreProfile):
     private: typing.Optional[str] = None
-class VectorStoreProfile(DatastoreProfile):
-    type: str = pydantic.Field("vector")
-    _private_attributes = ("kwargs_private",)
-    vector_store_class: str
-    kwargs_public: typing.Optional[dict] = None
-    kwargs_private: typing.Optional[dict] = None
-    def attributes(self, kwargs=None):
-        attributes = {}
-        if self.kwargs_public:
-            attributes = merge(attributes, self.kwargs_public)
-        if self.kwargs_private:
-            attributes = merge(attributes, self.kwargs_private)
-        if kwargs:
-            attributes = merge(attributes, kwargs)
-        return attributes
+class ConfigProfile(DatastoreProfile):
+    """
+    A profile class for managing configuration data with nested public and private attributes.
+    This class extends DatastoreProfile to handle configuration settings, separating them into
+    public and private dictionaries. Both dictionaries support nested structures, and the class
+    provides functionality to merge these attributes when needed.
+    Args:
+        public (Optional[dict]): Dictionary containing public configuration settings,
+            supporting nested structures
+        private (Optional[dict]): Dictionary containing private/sensitive configuration settings,
+            supporting nested structures
+    Example:
+        >>> public = {
+            "database": {
+                "host": "localhost",
+                "port": 5432
+            },
+            "api_version": "v1"
+        }
+        >>> private = {
+            "database": {
+                "password": "secret123",
+                "username": "admin"
+            },
+            "api_key": "xyz789"
+        }
+        >>> config = ConfigProfile("myconfig", public=public, private=private)
+        # When attributes() is called, it merges public and private:
+        # {
+        #     "database": {
+        #         "host": "localhost",
+        #         "port": 5432,
+        #         "password": "secret123",
+        #         "username": "admin"
+        #     },
+        #     "api_version": "v1",
+        #     "api_key": "xyz789"
+        # }
+    """
+    type = "config"
+    _private_attributes = "private"
+    public: typing.Optional[dict] = None
+    private: typing.Optional[dict] = None
+    def attributes(self):
+        res = {}
+        if self.public:
+            res = merge(res, self.public)
+        if self.private:
+            res = merge(res, self.private)
+        return res
 class DatastoreProfileKafkaTarget(DatastoreProfile):
@@ -494,7 +534,7 @@ class DatastoreProfile2Json(pydantic.v1.BaseModel):
             "gcs": DatastoreProfileGCS,
             "az": DatastoreProfileAzureBlob,
             "hdfs": DatastoreProfileHdfs,
-            "vector": VectorStoreProfile,
+            "config": ConfigProfile,
         }
         if datastore_type in ds_profile_factory:
             return ds_profile_factory[datastore_type].parse_obj(decoded_dict)

mlrun/datastore/vectorstore.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import inspect
-from importlib import import_module
+from collections.abc import Iterable
 from typing import Union
 from mlrun.artifacts import DocumentArtifact
@@ -21,57 +21,27 @@ from mlrun.artifacts import DocumentArtifact
 class VectorStoreCollection:
     """
-    VectorStoreCollection is a class that manages a collection of vector stores, providing methods to add and delete
-    documents and artifacts, and to interact with an MLRun context.
-    Attributes:
-        _collection_impl (object): The underlying collection implementation.
-        _mlrun_context (Union[MlrunProject, MLClientCtx]): The MLRun context associated with the collection.
-        collection_name (str): The name of the collection.
-        id (str): The unique identifier of the collection, composed of the datastore profile and collection name.
-    Methods:
-        add_documents(documents: list["Document"], **kwargs):
-            Adds a list of documents to the collection and updates the MLRun artifacts associated with the documents
-            if an MLRun context is present.
-        add_artifacts(artifacts: list[DocumentArtifact], splitter=None, **kwargs):
-            Adds a list of DocumentArtifact objects to the collection, optionally using a splitter to convert
-            artifacts to documents.
-        remove_itself_from_artifact(artifact: DocumentArtifact):
-            Removes the current object from the given artifact's collection and updates the artifact.
-        delete_artifacts(artifacts: list[DocumentArtifact]):
-            Deletes a list of DocumentArtifact objects from the collection and updates the MLRun context.
-            Raises NotImplementedError if the delete operation is not supported for the collection implementation.
+    A wrapper class for vector store collections with MLRun integration.
+    This class wraps a vector store implementation (like Milvus, Chroma) and provides
+    integration with MLRun context for document and artifact management. It delegates
+    most operations to the underlying vector store while handling MLRun-specific
+    functionality.
+    The class implements attribute delegation through __getattr__ and __setattr__,
+    allowing direct access to the underlying vector store's methods and attributes
+    while maintaining MLRun integration.
     """
     def __init__(
         self,
-        vector_store_class: str,
         mlrun_context: Union["MlrunProject", "MLClientCtx"],  # noqa: F821
-        datastore_profile: str,
         collection_name: str,
-        **kwargs,
+        vector_store: "VectorStore",  # noqa: F821
     ):
-        # Import the vector store class dynamically
-        module_name, class_name = vector_store_class.rsplit(".", 1)
-        module = import_module(module_name)
-        vector_store_class = getattr(module, class_name)
-        signature = inspect.signature(vector_store_class)
-        # Create the vector store instance
-        if "collection_name" in signature.parameters.keys():
-            vector_store = vector_store_class(collection_name=collection_name, **kwargs)
-        else:
-            vector_store = vector_store_class(**kwargs)
         self._collection_impl = vector_store
         self._mlrun_context = mlrun_context
         self.collection_name = collection_name
-        self.id = datastore_profile + "/" + collection_name
     def __getattr__(self, name):
         # This method is called when an attribute is not found in the usual places
@@ -112,40 +82,74 @@ class VectorStoreCollection:
                 )
                 if mlrun_uri:
                     artifact = self._mlrun_context.get_store_resource(mlrun_uri)
-                    artifact.collection_add(self.id)
+                    artifact.collection_add(self.collection_name)
                     self._mlrun_context.update_artifact(artifact)
         return self._collection_impl.add_documents(documents, **kwargs)
     def add_artifacts(self, artifacts: list[DocumentArtifact], splitter=None, **kwargs):
         """
-        Add a list of DocumentArtifact objects to the collection.
+        Add a list of DocumentArtifact objects to the vector store collection.
+        Converts artifacts to LangChain documents, adds them to the vector store, and
+        updates the MLRun context. If documents are split, the IDs are handled appropriately.
         Args:
-            artifacts (list[DocumentArtifact]): A list of DocumentArtifact objects to be added.
-            splitter (optional): An optional splitter to be used when converting artifacts to documents.
-            **kwargs: Additional keyword arguments to be passed to the collection's add_documents method.
+            artifacts (list[DocumentArtifact]): List of DocumentArtifact objects to add
+            splitter (optional): Document splitter to break artifacts into smaller chunks.
+                If None, each artifact becomes a single document.
+            **kwargs: Additional arguments passed to the underlying add_documents method.
+                Special handling for 'ids' kwarg:
+                - If provided and document is split, IDs are generated as "{original_id}_{i}"
+                    where i starts from 1 (e.g., "doc1_1", "doc1_2", etc.)
+                - If provided and document isn't split, original IDs are used as-is
         Returns:
-            list: A list of IDs of the added documents.
+            list: List of IDs for all added documents. When no custom IDs are provided:
+                - Without splitting: Vector store generates IDs automatically
+                - With splitting: Vector store generates separate IDs for each chunk
+                When custom IDs are provided:
+                - Without splitting: Uses provided IDs directly
+                - With splitting: Generates sequential IDs as "{original_id}_{i}" for each chunk
         """
         all_ids = []
-        for artifact in artifacts:
+        user_ids = kwargs.pop("ids", None)
+        if user_ids:
+            if not isinstance(user_ids, Iterable):
+                raise ValueError("IDs must be an iterable collection")
+            if len(user_ids) != len(artifacts):
+                raise ValueError(
+                    "The number of IDs should match the number of artifacts"
+                )
+        for index, artifact in enumerate(artifacts):
             documents = artifact.to_langchain_documents(splitter)
-            artifact.collection_add(self.id)
-            self._mlrun_context.update_artifact(artifact)
+            artifact.collection_add(self.collection_name)
+            if self._mlrun_context:
+                self._mlrun_context.update_artifact(artifact)
+            if user_ids:
+                num_of_documents = len(documents)
+                if num_of_documents > 1:
+                    ids_to_pass = [
+                        f"{user_ids[index]}_{i}" for i in range(1, num_of_documents + 1)
+                    ]
+                else:
+                    ids_to_pass = [user_ids[index]]
+                kwargs["ids"] = ids_to_pass
             ids = self._collection_impl.add_documents(documents, **kwargs)
             all_ids.extend(ids)
         return all_ids
-    def remove_itself_from_artifact(self, artifact: DocumentArtifact):
+    def remove_from_artifact(self, artifact: DocumentArtifact):
         """
         Remove the current object from the given artifact's collection and update the artifact.
         Args:
             artifact (DocumentArtifact): The artifact from which the current object should be removed.
         """
-        artifact.collection_remove(self.id)
-        self._mlrun_context.update_artifact(artifact)
+        artifact.collection_remove(self.collection_name)
+        if self._mlrun_context:
+            self._mlrun_context.update_artifact(artifact)
     def delete_artifacts(self, artifacts: list[DocumentArtifact]):
         """
@@ -162,13 +166,15 @@ class VectorStoreCollection:
         """
         store_class = self._collection_impl.__class__.__name__.lower()
         for artifact in artifacts:
-            artifact.collection_remove(self.id)
-            self._mlrun_context.update_artifact(artifact)
+            artifact.collection_remove(self.collection_name)
+            if self._mlrun_context:
+                self._mlrun_context.update_artifact(artifact)
             if store_class == "milvus":
-                expr = f"{DocumentArtifact.METADATA_SOURCE_KEY} == '{artifact.source}'"
+                expr = f"{DocumentArtifact.METADATA_SOURCE_KEY} == '{artifact.get_source()}'"
                 return self._collection_impl.delete(expr=expr)
             elif store_class == "chroma":
-                where = {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
+                where = {DocumentArtifact.METADATA_SOURCE_KEY: artifact.get_source()}
                 return self._collection_impl.delete(where=where)
             elif (
@@ -177,7 +183,9 @@ class VectorStoreCollection:
                 in inspect.signature(self._collection_impl.delete).parameters
             ):
                 filter = {
-                    "metadata": {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
+                    "metadata": {
+                        DocumentArtifact.METADATA_SOURCE_KEY: artifact.get_source()
+                    }
                 }
                 return self._collection_impl.delete(filter=filter)
             else:

mlrun 1.8.0rc4__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc4py3-none-any.whl → 1.8.0rc6py3-none-any.whl