PyPI - mlrun - Versions diffs - 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl - Mend

mlrun 1.10.0rc7py3-none-any.whl → 1.10.0rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (53) hide show

mlrun/__init__.py +3 -1
mlrun/common/db/dialects.py +25 -0
mlrun/common/schemas/background_task.py +5 -0
mlrun/common/schemas/function.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +16 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
mlrun/common/schemas/partition.py +13 -3
mlrun/common/schemas/project.py +4 -0
mlrun/common/schemas/serving.py +2 -0
mlrun/config.py +11 -22
mlrun/datastore/utils.py +3 -2
mlrun/db/__init__.py +1 -0
mlrun/db/base.py +11 -10
mlrun/db/httpdb.py +97 -25
mlrun/db/nopdb.py +5 -4
mlrun/db/sql_types.py +160 -0
mlrun/frameworks/tf_keras/__init__.py +4 -4
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
mlrun/frameworks/tf_keras/model_handler.py +80 -9
mlrun/frameworks/tf_keras/utils.py +12 -1
mlrun/launcher/base.py +6 -1
mlrun/launcher/client.py +1 -22
mlrun/launcher/local.py +0 -4
mlrun/model_monitoring/applications/base.py +21 -1
mlrun/model_monitoring/applications/context.py +2 -1
mlrun/projects/pipelines.py +35 -3
mlrun/projects/project.py +13 -29
mlrun/run.py +37 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/kubejob.py +0 -4
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/nuclio/function.py +0 -2
mlrun/runtimes/nuclio/serving.py +14 -51
mlrun/runtimes/pod.py +0 -3
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +2 -0
mlrun/serving/server.py +159 -123
mlrun/serving/states.py +215 -18
mlrun/serving/system_steps.py +391 -0
mlrun/serving/v2_serving.py +9 -8
mlrun/utils/helpers.py +19 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/METADATA +22 -18
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/RECORD +52 -50
mlrun/common/db/sql_session.py +0 -79
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/top_level.txt +0 -0

mlrun/projects/pipelines.py CHANGED Viewed

@@ -39,7 +39,12 @@ from mlrun.utils import (
 from ..common.helpers import parse_versioned_object_uri
 from ..config import config
-from ..run import _run_pipeline, retry_pipeline, wait_for_pipeline_completion
+from ..run import (
+    _run_pipeline,
+    retry_pipeline,
+    terminate_pipeline,
+    wait_for_pipeline_completion,
+)
 from ..runtimes.pod import AutoMountType
@@ -696,6 +701,24 @@ class _KFPRunner(_PipelineRunner):
         )
         return run_id
+    @classmethod
+    def terminate(
+        cls,
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+    ) -> str:
+        project_name = project.metadata.name if project else ""
+        logger.info(
+            "Terminating pipeline",
+            run_id=run.run_id,
+            project=project_name,
+        )
+        run_id = terminate_pipeline(
+            run.run_id,
+            project=project_name,
+        )
+        return run_id
     @staticmethod
     def wait_for_completion(
         run: "_PipelineRunStatus",
@@ -1130,6 +1153,7 @@ def load_and_run_workflow(
     project = mlrun.get_or_create_project(
         context=project_context or f"./{project_name}",
         name=project_name,
+        allow_cross_project=True,
     )
     # extract "start" notification if exists
@@ -1145,7 +1169,9 @@ def load_and_run_workflow(
         notification.when = ["running"]
     workflow_log_message = workflow_name or workflow_path
-    context.logger.info(f"Running workflow {workflow_log_message} from remote")
+    context.logger.info(
+        "Running workflow from remote", workflow_log_message=workflow_log_message
+    )
     run = project.run(
         name=workflow_name,
         workflow_path=workflow_path,
@@ -1162,6 +1188,11 @@ def load_and_run_workflow(
         notifications=start_notifications,
         context=context,
     )
+    # Patch the current run object (the workflow-runner) with the workflow-id label
+    context.logger.info(
+        "Associating workflow-runner with workflow ID", run_id=run.run_id
+    )
+    context.set_label("workflow-id", run.run_id)
     context.log_result(key="workflow_id", value=run.run_id)
     context.log_result(key="engine", value=run._engine.engine, commit=True)
@@ -1215,6 +1246,7 @@ def pull_remote_project_files(
             subpath=subpath,
             clone=clone,
             save=False,
+            allow_cross_project=True,
         )
     except Exception as error:
         notify_scheduled_workflow_failure(
@@ -1321,4 +1353,4 @@ def import_remote_project(
         sync_functions=True,
     )
-    context.logger.info(f"Loaded project {project.name} successfully")
+    context.logger.info("Loaded project successfully", project_name=project.name)

mlrun/projects/project.py CHANGED Viewed

@@ -2518,7 +2518,6 @@ class MlrunProject(ModelObj):
     def enable_model_monitoring(
         self,
-        default_controller_image: str = "mlrun/mlrun",
         base_period: int = 10,
         image: str = "mlrun/mlrun",
         *,
@@ -2534,7 +2533,6 @@ class MlrunProject(ModelObj):
         The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
         is detected. It processes the new events into statistics that are then written to statistics databases.
-        :param default_controller_image:          Deprecated.
         :param base_period:                       The time period in minutes in which the model monitoring controller
                                                   function is triggered. By default, the base period is 10 minutes
                                                   (which is also the minimum value for production environments).
@@ -2562,14 +2560,6 @@ class MlrunProject(ModelObj):
                                                   background, including the histogram data drift app if selected.
         :param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
         """
-        if default_controller_image != "mlrun/mlrun":
-            # TODO: Remove this in 1.10.0
-            warnings.warn(
-                "'default_controller_image' is deprecated in 1.7.0 and will be removed in 1.10.0, "
-                "use 'image' instead",
-                FutureWarning,
-            )
-            image = default_controller_image
         if base_period < 10:
             logger.warn(
                 "enable_model_monitoring: 'base_period' < 10 minutes is not supported in production environments",
@@ -2971,19 +2961,6 @@ class MlrunProject(ModelObj):
             mlrun.db.get_run_db().delete_function(name=name, project=self.metadata.name)
         self.spec.remove_function(name)
-    def remove_model_monitoring_function(self, name: Union[str, list[str]]):
-        """delete the specified model-monitoring-app function/s
-        :param name: name of the model-monitoring-function/s (under the project)
-        """
-        # TODO: Remove this in 1.10.0
-        warnings.warn(
-            "'remove_model_monitoring_function' is deprecated in 1.7.0 and will be removed in 1.10.0. "
-            "Please use `delete_model_monitoring_function` instead.",
-            FutureWarning,
-        )
-        self.delete_model_monitoring_function(name)
     def delete_model_monitoring_function(self, name: Union[str, list[str]]):
         """delete the specified model-monitoring-app function/s
@@ -3848,7 +3825,8 @@ class MlrunProject(ModelObj):
             )
         The replication factor and timeout configuration might need to be adjusted according to your Confluent cluster
-        type and settings.
+        type and settings. Nuclio annotations for the model monitoring infrastructure and application functions are
+        supported through ``kwargs_public={"nuclio_annotations": {...}, ...}``.
         :param tsdb_profile_name:         The datastore profile name of the time-series database to be used in model
                                           monitoring. The supported profiles are:
@@ -4278,11 +4256,17 @@ class MlrunProject(ModelObj):
         function = mlrun.new_function("mlrun--project--image--builder", kind="job")
         if self.spec.source and not self.spec.load_source_on_run:
-            function.with_source_archive(
-                source=self.spec.source,
-                target_dir=target_dir,
-                pull_at_runtime=False,
-            )
+            if self.spec.source.startswith("db://"):
+                logger.debug(
+                    "Project source is 'db://', which refers to metadata stored in the MLRun DB."
+                    " Skipping source archive setup for image build"
+                )
+            else:
+                function.with_source_archive(
+                    source=self.spec.source,
+                    target_dir=target_dir,
+                    pull_at_runtime=False,
+                )
         build = self.spec.build
         result = self.build_function(

mlrun/run.py CHANGED Viewed

@@ -894,7 +894,6 @@ def _run_pipeline(
 def retry_pipeline(
     run_id: str,
     project: str,
-    namespace: Optional[str] = None,
 ) -> str:
     """Retry a pipeline run.
@@ -903,7 +902,6 @@ def retry_pipeline(
     :param run_id: ID of the pipeline run to retry.
     :param project: name of the project associated with the pipeline run.
-    :param namespace: Optional; Kubernetes namespace to use if not the default.
     :returns: ID of the retried pipeline run or the ID of a cloned run if the original run is not retryable.
     :raises ValueError: If access to the remote API service is not available.
@@ -918,7 +916,6 @@ def retry_pipeline(
     pipeline_run_id = mldb.retry_pipeline(
         run_id=run_id,
         project=project,
-        namespace=namespace,
     )
     if pipeline_run_id == run_id:
         logger.info(
@@ -931,6 +928,35 @@ def retry_pipeline(
     return pipeline_run_id
+def terminate_pipeline(
+    run_id: str,
+    project: str,
+) -> str:
+    """Terminate a pipeline run.
+    This function terminates a running pipeline with the specified run ID. If the run is not in a
+    terminable state, an error is raised.
+    :param run_id: ID of the pipeline run to terminate.
+    :param project: name of the project associated with the pipeline run.
+    :returns: ID of the terminate pipeline run background task.
+    :raises ValueError: If access to the remote API service is not available.
+    """
+    mldb = mlrun.db.get_run_db()
+    if mldb.kind != "http":
+        raise ValueError(
+            "Terminating a pipeline requires access to remote API service. "
+            "Please set the dbpath URL."
+        )
+    pipeline_run_task = mldb.terminate_pipeline(
+        run_id=run_id,
+        project=project,
+    )
+    return pipeline_run_task["metadata"]["id"]
 def wait_for_pipeline_completion(
     run_id,
     timeout=60 * 60,
@@ -997,7 +1023,10 @@ def wait_for_pipeline_completion(
             _wait_for_pipeline_completion,
         )
     else:
-        client = mlrun_pipelines.utils.get_client(namespace=namespace)
+        client = mlrun_pipelines.utils.get_client(
+            logger=logger,
+            namespace=namespace,
+        )
         resp = client.wait_for_run_completion(run_id, timeout)
         if resp:
             resp = resp.to_dict()
@@ -1058,7 +1087,10 @@ def get_pipeline(
         )
     else:
-        client = mlrun_pipelines.utils.get_client(namespace=namespace)
+        client = mlrun_pipelines.utils.get_client(
+            logger=logger,
+            namespace=namespace,
+        )
         resp = client.get_run(run_id)
         if resp:
             resp = resp.to_dict()

mlrun/runtimes/daskjob.py CHANGED Viewed

@@ -92,7 +92,6 @@ class DaskSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -122,7 +121,6 @@ class DaskSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.args = args

mlrun/runtimes/kubejob.py CHANGED Viewed

@@ -207,7 +207,3 @@ class KubejobRuntime(KubeResource):
         raise NotImplementedError(
             f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
         )
-    @property
-    def serving_spec(self):
-        return self.spec.serving_spec

mlrun/runtimes/mpijob/abstract.py CHANGED Viewed

@@ -54,7 +54,6 @@ class MPIResourceSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -84,7 +83,6 @@ class MPIResourceSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.mpi_args = mpi_args or [
             "-x",

mlrun/runtimes/mpijob/v1.py CHANGED Viewed

@@ -49,7 +49,6 @@ class MPIV1ResourceSpec(MPIResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -80,7 +79,6 @@ class MPIV1ResourceSpec(MPIResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -154,7 +154,6 @@ class NuclioSpec(KubeResourceSpec):
         add_templated_ingress_host_mode=None,
         state_thresholds=None,
         disable_default_http_trigger=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -184,7 +183,6 @@ class NuclioSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.base_spec = base_spec or {}

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import copy
 import json
 import os
 import warnings
@@ -43,8 +42,6 @@ from mlrun.serving.states import (
 )
 from mlrun.utils import get_caller_globals, logger, set_paths
-from .. import KubejobRuntime
-from ..pod import KubeResourceSpec
 from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
 serving_subkind = "serving_v2"
@@ -152,7 +149,6 @@ class ServingSpec(NuclioSpec):
         state_thresholds=None,
         disable_default_http_trigger=None,
         model_endpoint_creation_task_name=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -193,7 +189,6 @@ class ServingSpec(NuclioSpec):
             service_type=service_type,
             add_templated_ingress_host_mode=add_templated_ingress_host_mode,
             disable_default_http_trigger=disable_default_http_trigger,
-            serving_spec=serving_spec,
         )
         self.models = models or {}
@@ -482,6 +477,7 @@ class ServingRuntime(RemoteRuntime):
                 state = TaskStep(
                     class_name,
                     class_args,
+                    name=key,
                     handler=handler,
                     function=child_function,
                     model_endpoint_creation_strategy=creation_strategy,
@@ -707,7 +703,6 @@ class ServingRuntime(RemoteRuntime):
             "track_models": self.spec.track_models,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
-            "filename": getattr(self.spec, "filename", None),
         }
         if self.spec.secret_sources:
@@ -716,10 +711,6 @@ class ServingRuntime(RemoteRuntime):
         return json.dumps(serving_spec)
-    @property
-    def serving_spec(self):
-        return self._get_serving_spec()
     def to_mock_server(
         self,
         namespace=None,
@@ -751,13 +742,10 @@ class ServingRuntime(RemoteRuntime):
             set_paths(workdir)
             os.chdir(workdir)
-        system_graph = None
-        if isinstance(self.spec.graph, RootFlowStep):
-            system_graph = add_system_steps_to_graph(copy.deepcopy(self.spec.graph))
         server = create_graph_server(
             parameters=self.spec.parameters,
             load_mode=self.spec.load_mode,
-            graph=system_graph or self.spec.graph,
+            graph=self.spec.graph,
             verbose=self.verbose,
             current_function=current_function,
             graph_initializer=self.spec.graph_initializer,
@@ -778,6 +766,18 @@ class ServingRuntime(RemoteRuntime):
             monitoring_mock=self.spec.track_models,
         )
+        if (
+            isinstance(self.spec.graph, RootFlowStep)
+            and self.spec.graph.include_monitored_step()
+        ):
+            server.graph = add_system_steps_to_graph(
+                server.project,
+                server.graph,
+                self.spec.track_models,
+                server.context,
+                self.spec,
+            )
         if workdir:
             os.chdir(old_workdir)
@@ -815,40 +815,3 @@ class ServingRuntime(RemoteRuntime):
             "Turn off the mock (mock=False) and make sure Nuclio is installed for real deployment to Nuclio"
         )
         self._mock_server = self.to_mock_server()
-    def to_job(self) -> KubejobRuntime:
-        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
-        if self.spec.function_refs:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
-            )
-        spec = KubeResourceSpec(
-            image=self.spec.image,
-            mode=self.spec.mode,
-            volumes=self.spec.volumes,
-            volume_mounts=self.spec.volume_mounts,
-            env=self.spec.env,
-            resources=self.spec.resources,
-            default_handler="mlrun.serving.server.execute_graph",
-            pythonpath=self.spec.pythonpath,
-            entry_points=self.spec.entry_points,
-            description=self.spec.description,
-            workdir=self.spec.workdir,
-            image_pull_secret=self.spec.image_pull_secret,
-            node_name=self.spec.node_name,
-            node_selector=self.spec.node_selector,
-            affinity=self.spec.affinity,
-            disable_auto_mount=self.spec.disable_auto_mount,
-            priority_class_name=self.spec.priority_class_name,
-            tolerations=self.spec.tolerations,
-            preemption_mode=self.spec.preemption_mode,
-            security_context=self.spec.security_context,
-            state_thresholds=self.spec.state_thresholds,
-            serving_spec=self._get_serving_spec(),
-        )
-        job = KubejobRuntime(
-            spec=spec,
-            metadata=self.metadata,
-        )
-        return job

mlrun/runtimes/pod.py CHANGED Viewed

@@ -103,7 +103,6 @@ class KubeResourceSpec(FunctionSpec):
         "preemption_mode",
         "security_context",
         "state_thresholds",
-        "serving_spec",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -179,7 +178,6 @@ class KubeResourceSpec(FunctionSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -225,7 +223,6 @@ class KubeResourceSpec(FunctionSpec):
             state_thresholds
             or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
         )
-        self.serving_spec = serving_spec
         # Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
         # _dict_fields and doesn't have a setter.
         self._termination_grace_period_seconds = None

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -58,7 +58,6 @@ class RemoteSparkSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -88,7 +87,6 @@ class RemoteSparkSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.provider = provider

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -168,7 +168,6 @@ class Spark3JobSpec(KubeResourceSpec):
         executor_cores=None,
         security_context=None,
         state_thresholds=None,
-        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -198,7 +197,6 @@ class Spark3JobSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
-            serving_spec=serving_spec,
         )
         self.driver_resources = driver_resources or {}

mlrun/serving/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ __all__ = [
     "ModelRunner",
     "Model",
     "ModelSelector",
+    "MonitoredStep",
 ]
 from .routers import ModelRouter, VotingEnsemble  # noqa
@@ -45,6 +46,7 @@ from .states import (
     ModelRunner,
     Model,
     ModelSelector,
+    MonitoredStep,
 )  # noqa
 from .v1_serving import MLModelServer, new_v1_model_server  # noqa
 from .v2_serving import V2ModelServer  # noqa

mlrun 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc7py3-none-any.whl → 1.10.0rc9py3-none-any.whl