PyPI - mlrun - Versions diffs - 1.8.0rc27__py3-none-any.whl → 1.8.0rc28__py3-none-any.whl - Mend

mlrun 1.8.0rc27py3-none-any.whl → 1.8.0rc28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show

mlrun/artifacts/document.py +9 -6
mlrun/common/schemas/serving.py +22 -0
mlrun/config.py +9 -0
mlrun/datastore/base.py +0 -7
mlrun/datastore/s3.py +9 -2
mlrun/db/base.py +0 -1
mlrun/db/httpdb.py +5 -10
mlrun/db/nopdb.py +0 -1
mlrun/execution.py +15 -4
mlrun/model_monitoring/applications/_application_steps.py +1 -0
mlrun/model_monitoring/applications/base.py +132 -21
mlrun/model_monitoring/applications/context.py +2 -3
mlrun/model_monitoring/controller.py +117 -57
mlrun/model_monitoring/db/_schedules.py +8 -0
mlrun/model_monitoring/db/tsdb/__init__.py +12 -5
mlrun/model_monitoring/stream_processing.py +3 -2
mlrun/projects/project.py +38 -7
mlrun/runtimes/base.py +1 -1
mlrun/runtimes/generators.py +1 -1
mlrun/runtimes/nuclio/function.py +37 -0
mlrun/runtimes/nuclio/serving.py +3 -0
mlrun/runtimes/pod.py +1 -3
mlrun/serving/routers.py +62 -17
mlrun/serving/server.py +11 -0
mlrun/serving/states.py +0 -4
mlrun/serving/v2_serving.py +45 -10
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/METADATA +4 -2
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/RECORD +33 -32
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc27.dist-info → mlrun-1.8.0rc28.dist-info}/top_level.txt +0 -0

mlrun/artifacts/document.py CHANGED Viewed

@@ -34,7 +34,9 @@ class DocumentLoaderSpec(ModelObj):
     This class is responsible for loading documents from a given source path using a specified loader class.
     The loader class is dynamically imported and instantiated with the provided arguments. The loaded documents
-    can be optionally uploaded as artifacts.
+    can be optionally uploaded as artifacts. Note that only loader classes that return single results
+    (e.g., TextLoader, UnstructuredHTMLLoader, WebBaseLoader(scalar)) are supported - loaders returning multiple
+    results like DirectoryLoader or WebBaseLoader(list) are not compatible.
     Attributes:
         loader_class_name (str): The name of the loader class to use for loading documents.
@@ -61,7 +63,7 @@ class DocumentLoaderSpec(ModelObj):
             kwargs (Optional[dict]): Additional keyword arguments to pass to the loader class.
             download_object (bool, optional): If True, the file will be downloaded before launching
                 the loader. If False, the loader accepts a link that should not be downloaded.
-                Defaults to False.
+                Defaults to True.
         Example:
             >>> # Create a loader specification for PDF documents
             >>> loader_spec = DocumentLoaderSpec(
@@ -97,7 +99,7 @@ class MLRunLoader:
     Args:
         artifact_key (str, optional): The key for the artifact to be logged. Special characters and symbols
             not valid in artifact names will be encoded as their hexadecimal representation. The '%%' pattern
-            in the key will be replaced by the hex-encoded version of the source path. Defaults to "doc%%".
+            in the key will be replaced by the hex-encoded version of the source path. Defaults to "%%".
         local_path (str): The source path of the document to be loaded.
         loader_spec (DocumentLoaderSpec): Specification for the document loader.
         producer (Optional[Union[MlrunProject, str, MLClientCtx]], optional): The producer of the document.
@@ -129,7 +131,7 @@ class MLRunLoader:
         >>> loader = MLRunLoader(
         ...     source_path="/path/to/document.txt",
         ...     loader_spec=loader_spec,
-        ...     artifact_key="doc%%",  # %% will be replaced with encoded path
+        ...     artifact_key="%%",  # %% will be replaced with encoded path
         ...     producer=project,
         ... )
         >>> documents = loader.load()
@@ -141,7 +143,7 @@ class MLRunLoader:
         ...     loader_cls=MLRunLoader,
         ...     loader_kwargs={
         ...         "loader_spec": loader_spec,
-        ...         "artifact_key": "doc%%",
+        ...         "artifact_key": "%%",
         ...         "producer": project,
         ...         "upload": True,
         ...     },
@@ -154,7 +156,7 @@ class MLRunLoader:
         cls,
         source_path: str,
         loader_spec: "DocumentLoaderSpec",
-        artifact_key="doc%%",
+        artifact_key="%%",
         producer: Optional[Union["MlrunProject", str, "MLClientCtx"]] = None,  # noqa: F821
         upload: bool = False,
         tag: str = "",
@@ -271,6 +273,7 @@ class DocumentArtifact(Artifact):
             result.append("_")
         resolved_path = "".join(result)
+        resolved_path = resolved_path.lstrip("_")
         return resolved_path
     class DocumentArtifactSpec(ArtifactSpec):

mlrun/common/schemas/serving.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic.v1 import BaseModel
+from .background_task import BackgroundTaskList
+class DeployResponse(BaseModel):
+    data: dict
+    background_tasks: BackgroundTaskList

mlrun/config.py CHANGED Viewed

@@ -232,6 +232,7 @@ default_config = {
                 "abort_grace_period": "10",
                 "delete_project": "900",
                 "delete_function": "900",
+                "model_endpoint_creation": "600",
             },
             "runtimes": {"dask": "600"},
             "push_notifications": "60",
@@ -870,6 +871,14 @@ class Config:
             return self.__class__(val)
         return val
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        # create a new Config without calling __init__ (avoid recursion)
+        result = cls.__new__(cls)
+        # manually deep-copy _cfg
+        object.__setattr__(result, "_cfg", copy.deepcopy(self._cfg, memo))
+        return result
     def __setattr__(self, attr, value):
         # in order for the dbpath setter to work
         if attr == "dbpath":

mlrun/datastore/base.py CHANGED Viewed

@@ -677,13 +677,6 @@ class DataItem:
         return f"'{self.url}'"
-def get_range(size, offset):
-    byterange = f"bytes={offset}-"
-    if size:
-        byterange += str(offset + size)
-    return byterange
 def basic_auth_header(user, password):
     username = user.encode("latin1")
     password = password.encode("latin1")

mlrun/datastore/s3.py CHANGED Viewed

@@ -21,7 +21,7 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class S3Store(DataStore):
@@ -108,6 +108,13 @@ class S3Store(DataStore):
                     "choose-signer.s3.*", disable_signing
                 )
+    @staticmethod
+    def get_range(size, offset):
+        byterange = f"bytes={offset}-"
+        if size:
+            byterange += str(offset + size - 1)
+        return byterange
     def get_spark_options(self):
         res = {}
         st = self.get_storage_options()
@@ -185,7 +192,7 @@ class S3Store(DataStore):
         bucket, key = self.get_bucket_and_key(key)
         obj = self.s3.Object(bucket, key)
         if size or offset:
-            return obj.get(Range=get_range(size, offset))["Body"].read()
+            return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
         return obj.get()["Body"].read()
     def put(self, key, data, append=False):

mlrun/db/base.py CHANGED Viewed

@@ -1077,7 +1077,6 @@ class RunDBInterface(ABC):
         base_period: int = 10,
         image: str = "mlrun/mlrun",
         deploy_histogram_data_drift_app: bool = True,
-        rebuild_images: bool = False,
         fetch_credentials_from_sys_config: bool = False,
     ) -> None:
         pass

mlrun/db/httpdb.py CHANGED Viewed

@@ -2374,9 +2374,9 @@ class HTTPRunDB(RunDBInterface):
     def retry_pipeline(
         self,
         run_id: str,
+        project: str,
         namespace: Optional[str] = None,
         timeout: int = 30,
-        project: Optional[str] = None,
     ):
         """
         Retry a specific pipeline run using its run ID. This function sends an API request
@@ -2386,8 +2386,7 @@ class HTTPRunDB(RunDBInterface):
         :param run_id: The unique ID of the pipeline run to retry.
         :param namespace: Kubernetes namespace where the pipeline is running. Optional.
         :param timeout: Timeout (in seconds) for the API call. Defaults to 30 seconds.
-        :param project: Name of the MLRun project associated with the pipeline. Can be
-            ``*`` to query across all projects. Optional.
+        :param project: Name of the MLRun project associated with the pipeline.
         :raises ValueError: Raised if the API response is not successful or contains an
             error.
@@ -2398,14 +2397,13 @@ class HTTPRunDB(RunDBInterface):
         params = {}
         if namespace:
             params["namespace"] = namespace
-        project_path = project if project else "*"
         resp_text = ""
         resp_code = None
         try:
             resp = self.api_call(
                 "POST",
-                f"projects/{project_path}/pipelines/{run_id}/retry",
+                f"projects/{project}/pipelines/{run_id}/retry",
                 params=params,
                 timeout=timeout,
             )
@@ -2420,7 +2418,7 @@ class HTTPRunDB(RunDBInterface):
             logger.error(
                 "Retry pipeline API call encountered an error.",
                 run_id=run_id,
-                project=project_path,
+                project=project,
                 namespace=namespace,
                 response_code=resp_code,
                 response_text=resp_text,
@@ -2435,7 +2433,7 @@ class HTTPRunDB(RunDBInterface):
         logger.info(
             "Successfully retried pipeline run",
             run_id=run_id,
-            project=project_path,
+            project=project,
             namespace=namespace,
         )
         return resp.json()
@@ -3973,7 +3971,6 @@ class HTTPRunDB(RunDBInterface):
         base_period: int = 10,
         image: str = "mlrun/mlrun",
         deploy_histogram_data_drift_app: bool = True,
-        rebuild_images: bool = False,
         fetch_credentials_from_sys_config: bool = False,
     ) -> None:
         """
@@ -3991,7 +3988,6 @@ class HTTPRunDB(RunDBInterface):
                                                   stream functions, which are real time nuclio functions.
                                                   By default, the image is mlrun/mlrun.
         :param deploy_histogram_data_drift_app:   If true, deploy the default histogram-based data drift application.
-        :param rebuild_images:                    If true, force rebuild of model monitoring infrastructure images.
         :param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
         """
@@ -4002,7 +3998,6 @@ class HTTPRunDB(RunDBInterface):
                 "base_period": base_period,
                 "image": image,
                 "deploy_histogram_data_drift_app": deploy_histogram_data_drift_app,
-                "rebuild_images": rebuild_images,
                 "fetch_credentials_from_sys_config": fetch_credentials_from_sys_config,
             },
         )

mlrun/db/nopdb.py CHANGED Viewed

@@ -857,7 +857,6 @@ class NopDB(RunDBInterface):
         base_period: int = 10,
         image: str = "mlrun/mlrun",
         deploy_histogram_data_drift_app: bool = True,
-        rebuild_images: bool = False,
         fetch_credentials_from_sys_config: bool = False,
     ) -> None:
         pass

mlrun/execution.py CHANGED Viewed

@@ -914,7 +914,8 @@ class MLClientCtx:
                     kwargs={"extract_images": True}
                 )
         :param upload: Whether to upload the artifact
-        :param labels: Key-value labels
+        :param labels:  Key-value labels. A 'source' label is automatically added using either
+                        local_path or target_path to facilitate easier document searching.
         :param target_path: Path to the local file
         :param db_key: The key to use in the artifact DB table, by default its run name + '_' + key
                        db_key=False will not register it in the artifacts table
@@ -932,22 +933,32 @@ class MLClientCtx:
             ...     ),
             ... )
         """
+        original_source = local_path or target_path
-        if not key and not local_path and not target_path:
+        if not key and not original_source:
             raise ValueError(
                 "Must provide either 'key' parameter or 'local_path'/'target_path' to derive the key from"
             )
         if not key:
-            key = DocumentArtifact.key_from_source(local_path or target_path)
+            key = DocumentArtifact.key_from_source(original_source)
         doc_artifact = DocumentArtifact(
             key=key,
-            original_source=local_path or target_path,
+            original_source=original_source,
             document_loader_spec=document_loader_spec,
             collections=kwargs.pop("collections", None),
             **kwargs,
         )
+        # limit label to a max of 255 characters (for db reasons)
+        max_length = 255
+        labels = labels or {}
+        labels["source"] = (
+            original_source[: max_length - 3] + "..."
+            if len(original_source) > max_length
+            else original_source
+        )
         item = self._artifacts_manager.log_artifact(
             self,
             doc_artifact,

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -126,6 +126,7 @@ class _PrepareMonitoringEvent(StepToDict):
         :param application_name: Application name.
         """
         self.graph_context = context
+        _ = self.graph_context.project_obj  # Ensure project exists
         self.application_name = application_name
         self.model_endpoints: dict[str, mlrun.common.schemas.ModelEndpoint] = {}

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -215,6 +215,116 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         )
         function.deploy()
+    @classmethod
+    def get_job_handler(cls, handler_to_class: str) -> str:
+        """
+        A helper function to get the handler to the application job ``_handler``.
+        :param handler_to_class: The handler to the application class, e.g. ``my_package.sub_module1.MonitoringApp1``.
+        :returns:                The handler to the job of the application class.
+        """
+        return f"{handler_to_class}::{cls._handler.__name__}"
+    @classmethod
+    def to_job(
+        cls,
+        *,
+        class_handler: Optional[str] = None,
+        func_path: Optional[str] = None,
+        func_name: Optional[str] = None,
+        tag: Optional[str] = None,
+        image: Optional[str] = None,
+        with_repo: Optional[bool] = False,
+        requirements: Optional[Union[str, list[str]]] = None,
+        requirements_file: str = "",
+        project: Optional["mlrun.MlrunProject"] = None,
+    ) -> mlrun.runtimes.KubejobRuntime:
+        """
+        Get the application's :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
+        model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`.
+        The returned job can be run as any MLRun job with the relevant inputs and params to your application:
+        .. code-block:: python
+            job = ModelMonitoringApplicationBase.to_job(
+                class_handler="package.module.AppClass"
+            )
+            job.run(inputs={}, params={}, local=False)  # Add the relevant inputs and params
+        Optional inputs:
+        * ``sample_data``, ``pd.DataFrame``
+        * ``reference_data``, ``pd.DataFrame``
+        Optional params:
+        * ``endpoints``, ``list[tuple[str, str]]``
+        * ``start``, ``datetime``
+        * ``end``, ``datetime``
+        * ``base_period``, ``int``
+        For Git sources, add the source archive to the returned job and change the handler:
+        .. code-block:: python
+            handler = ModelMonitoringApplicationBase.get_job_handler("module.AppClass")
+            job.with_source_archive(
+                "git://github.com/owner/repo.git#branch-category/specific-task",
+                workdir="path/to/application/folder",
+                handler=handler,
+            )
+        :param class_handler:     The handler to the class, e.g. ``path.to.module::MonitoringApplication``,
+                                  useful when using Git sources or code from images.
+                                  If ``None``, the current class, deriving from
+                                  :py:class:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase`,
+                                  is used.
+        :param func_path:         The path to the function. If ``None``, the current notebook is used.
+        :param func_name:         The name of the function. If not ``None``, the class name is used.
+        :param tag:               Tag for the function.
+        :param image:             Docker image to run the job on (when running remotely).
+        :param with_repo:         Whether to clone the current repo to the build source.
+        :param requirements:      List of Python requirements to be installed in the image.
+        :param requirements_file: Path to a Python requirements file to be installed in the image.
+        :param project:           The current project to set the function to. If not set, the current project is used.
+        :returns: The :py:class:`~mlrun.runtimes.KubejobRuntime` job that wraps the model monitoring application's
+                  logic.
+        """
+        project = project or cast("mlrun.MlrunProject", mlrun.get_current_project())
+        if not class_handler and cls == ModelMonitoringApplicationBase:
+            raise ValueError(
+                "You must provide a handler to the model monitoring application class"
+            )
+        handler_to_class = class_handler or cls.__name__
+        handler = cls.get_job_handler(handler_to_class)
+        if not class_handler:
+            class_name = cls.__name__
+        else:
+            class_name = handler_to_class.split(".")[-1].split("::")[-1]
+        job_name = func_name if func_name else class_name
+        job = cast(
+            mlrun.runtimes.KubejobRuntime,
+            project.set_function(
+                func=func_path,
+                name=job_name,
+                kind=mlrun.runtimes.KubejobRuntime.kind,
+                handler=handler,
+                tag=tag,
+                image=image,
+                with_repo=with_repo,
+                requirements=requirements,
+                requirements_file=requirements_file,
+            ),
+        )
+        return job
     @classmethod
     def evaluate(
         cls,
@@ -223,10 +333,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         *,
         tag: Optional[str] = None,
         run_local: bool = True,
+        auto_build: bool = True,
         sample_data: Optional[pd.DataFrame] = None,
         reference_data: Optional[pd.DataFrame] = None,
         image: Optional[str] = None,
         with_repo: Optional[bool] = False,
+        class_handler: Optional[str] = None,
         requirements: Optional[Union[str, list[str]]] = None,
         requirements_file: str = "",
         endpoints: Optional[list[tuple[str, str]]] = None,
@@ -239,19 +351,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
         model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
-        This method has default values for all of its arguments. You should be change them when you want to pass
+        This function has default values for all of its arguments. You should be change them when you want to pass
         data to the application.
         :param func_path:         The path to the function. If ``None``, the current notebook is used.
         :param func_name:         The name of the function. If not ``None``, the class name is used.
         :param tag:               Tag for the function.
         :param run_local:         Whether to run the function locally or remotely.
+        :param auto_build:        Whether to auto build the function.
         :param sample_data:       Pandas data-frame as the current dataset.
                                   When set, it replaces the data read from the model endpoint's offline source.
         :param reference_data:    Pandas data-frame of the reference dataset.
                                   When set, its statistics override the model endpoint's feature statistics.
-        :param image:             Docker image to run the job on.
+        :param image:             Docker image to run the job on (when running remotely).
         :param with_repo:         Whether to clone the current repo to the build source.
+        :param class_handler:     The relative path to the class, useful when using Git sources or code from images.
         :param requirements:      List of Python requirements to be installed in the image.
         :param requirements_file: Path to a Python requirements file to be installed in the image.
         :param endpoints:         A list of tuples of the model endpoint (name, uid) to get the data from.
@@ -268,23 +382,17 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                   method with the given parameters and inputs, wrapped in a :py:class:`~mlrun.model.RunObject`.
         """
         project = cast("mlrun.MlrunProject", mlrun.get_current_project())
-        class_name = cls.__name__
-        job_name = func_name if func_name is not None else class_name
-        handler = f"{class_name}::{cls._handler.__name__}"
-        job = cast(
-            mlrun.runtimes.KubejobRuntime,
-            project.set_function(
-                func=func_path,
-                name=job_name,
-                kind=mlrun.runtimes.KubejobRuntime.kind,
-                handler=handler,
-                tag=tag,
-                image=image,
-                with_repo=with_repo,
-                requirements=requirements,
-                requirements_file=requirements_file,
-            ),
+        job = cls.to_job(
+            func_path=func_path,
+            func_name=func_name,
+            class_handler=class_handler,
+            tag=tag,
+            image=image,
+            with_repo=with_repo,
+            requirements=requirements,
+            requirements_file=requirements_file,
+            project=project,
         )
         params: dict[str, Union[list[tuple[str, str]], datetime, int, None]] = {}
@@ -305,18 +413,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
             (reference_data, "reference_data"),
         ]:
             if data is not None:
-                key = f"{job_name}_{identifier}"
+                key = f"{job.metadata.name}_{identifier}"
                 inputs[identifier] = project.log_dataset(
                     key,
                     data,
                     labels={
                         mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
                         mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-job",
-                        mlrun_constants.MLRunInternalLabels.app_name: class_name,
+                        mlrun_constants.MLRunInternalLabels.app_name: func_name
+                        or cls.__name__,
                     },
                 ).uri
-        run_result = job.run(local=run_local, params=params, inputs=inputs)
+        run_result = job.run(
+            local=run_local, auto_build=auto_build, params=params, inputs=inputs
+        )
         return run_result
     @abstractmethod

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -169,9 +169,8 @@ class MonitoringApplicationContext:
         sample_df: Optional[pd.DataFrame] = None,
         feature_stats: Optional[FeatureStats] = None,
     ) -> "MonitoringApplicationContext":
-        project = mlrun.load_project(url=graph_context.project)
         nuclio_logger = graph_context.logger
-        artifacts_logger = project
+        artifacts_logger = graph_context.project_obj
         logger = mlrun.utils.create_logger(
             level=mlrun.mlconf.log_level,
             formatter_kind=mlrun.mlconf.log_formatter,
@@ -180,7 +179,7 @@ class MonitoringApplicationContext:
         return cls(
             application_name=application_name,
             event=event,
-            project=project,
+            project=graph_context.project_obj,
             model_endpoint_dict=model_endpoint_dict,
             logger=logger,
             nuclio_logger=nuclio_logger,

mlrun 1.8.0rc27__py3-none-any.whl → 1.8.0rc28__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc27py3-none-any.whl → 1.8.0rc28py3-none-any.whl