PyPI - mlrun - Versions diffs - 1.10.0rc10__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl - Mend

mlrun 1.10.0rc10py3-none-any.whl → 1.10.0rc11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (51) hide show

mlrun/artifacts/manager.py +1 -1
mlrun/common/constants.py +11 -0
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/functions.py +2 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +19 -1
mlrun/common/schemas/serving.py +1 -0
mlrun/common/schemas/workflow.py +3 -2
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +4 -2
mlrun/datastore/datastore.py +46 -14
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/s3.py +16 -5
mlrun/datastore/sources.py +2 -2
mlrun/datastore/targets.py +2 -2
mlrun/db/__init__.py +0 -1
mlrun/db/base.py +12 -0
mlrun/db/httpdb.py +35 -0
mlrun/db/nopdb.py +10 -0
mlrun/execution.py +12 -0
mlrun/frameworks/tf_keras/mlrun_interface.py +7 -18
mlrun/launcher/base.py +1 -0
mlrun/launcher/client.py +1 -0
mlrun/launcher/local.py +4 -0
mlrun/model.py +15 -4
mlrun/model_monitoring/applications/base.py +74 -56
mlrun/model_monitoring/db/tsdb/base.py +52 -19
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +179 -11
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +26 -11
mlrun/model_monitoring/helpers.py +48 -0
mlrun/projects/pipelines.py +12 -3
mlrun/projects/project.py +30 -0
mlrun/runtimes/daskjob.py +2 -0
mlrun/runtimes/kubejob.py +4 -0
mlrun/runtimes/mpijob/abstract.py +2 -0
mlrun/runtimes/mpijob/v1.py +2 -0
mlrun/runtimes/nuclio/function.py +2 -0
mlrun/runtimes/nuclio/serving.py +59 -0
mlrun/runtimes/pod.py +3 -0
mlrun/runtimes/remotesparkjob.py +2 -0
mlrun/runtimes/sparkjob/spark3job.py +2 -0
mlrun/serving/server.py +97 -3
mlrun/serving/states.py +146 -38
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/METADATA +13 -6
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/RECORD +49 -51
mlrun/db/sql_types.py +0 -160
mlrun/utils/db.py +0 -71
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/top_level.txt +0 -0

mlrun/projects/pipelines.py CHANGED Viewed

@@ -21,6 +21,7 @@ import typing
 import uuid
 import mlrun
+import mlrun.common.constants as mlrun_constants
 import mlrun.common.runtimes.constants
 import mlrun.common.schemas
 import mlrun.common.schemas.function
@@ -1086,10 +1087,16 @@ def rerun_workflow(
         # Retry the pipeline  - TODO: add submit-direct flag when created
         db = mlrun.get_run_db()
-        new_pipeline_id = db.retry_pipeline(run_uid, project_name)
+        new_pipeline_id = db.retry_pipeline(
+            run_uid, project_name, submit_mode=mlrun_constants.WorkflowSubmitMode.direct
+        )
         # Store result for observability
-        context.set_label("workflow-id", new_pipeline_id)
+        context.set_label(
+            mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id
+        )
+        context.update_run()
         context.log_result("workflow_id", new_pipeline_id)
         # wait for pipeline completion so monitor will push terminal notifications
@@ -1226,7 +1233,9 @@ def load_and_run_workflow(
     context.logger.info(
         "Associating workflow-runner with workflow ID", run_id=run.run_id
     )
-    context.set_label("workflow-id", run.run_id)
+    context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, run.run_id)
+    context.update_run()
     context.log_result(key="workflow_id", value=run.run_id)
     context.log_result(key="engine", value=run._engine.engine, commit=True)

mlrun/projects/project.py CHANGED Viewed

@@ -4968,6 +4968,36 @@ class MlrunProject(ModelObj):
             include_infra=include_infra,
         )
+    def get_monitoring_function_summary(
+        self,
+        name: str,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+        include_latest_metrics: bool = False,
+    ) -> mlrun.common.schemas.model_monitoring.FunctionSummary:
+        """Get a monitoring function summary for the specified project and function name.
+        :param name:                   Name of the monitoring function to retrieve the summary for.
+        :param start:                  Start time for filtering the results (optional).
+        :param end:                    End time for filtering the results (optional).
+        :param include_latest_metrics: Whether to include the latest metrics in the response (default is False).
+        :return: A FunctionSummary object containing information about the monitoring function.
+        """
+        if start is not None and end is not None:
+            if start.tzinfo is None or end.tzinfo is None:
+                raise mlrun.errors.MLRunInvalidArgumentTypeError(
+                    "Custom start and end times must contain the timezone."
+                )
+        db = mlrun.db.get_run_db(secrets=self._secrets)
+        return db.get_monitoring_function_summary(
+            project=self.metadata.name,
+            function_name=name,
+            start=start,
+            end=end,
+            include_latest_metrics=include_latest_metrics,
+        )
     def list_runs(
         self,
         name: Optional[str] = None,

mlrun/runtimes/daskjob.py CHANGED Viewed

@@ -92,6 +92,7 @@ class DaskSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -121,6 +122,7 @@ class DaskSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.args = args

mlrun/runtimes/kubejob.py CHANGED Viewed

@@ -207,3 +207,7 @@ class KubejobRuntime(KubeResource):
         raise NotImplementedError(
             f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
         )
+    @property
+    def serving_spec(self):
+        return self.spec.serving_spec

mlrun/runtimes/mpijob/abstract.py CHANGED Viewed

@@ -54,6 +54,7 @@ class MPIResourceSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -83,6 +84,7 @@ class MPIResourceSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.mpi_args = mpi_args or [
             "-x",

mlrun/runtimes/mpijob/v1.py CHANGED Viewed

@@ -49,6 +49,7 @@ class MPIV1ResourceSpec(MPIResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -79,6 +80,7 @@ class MPIV1ResourceSpec(MPIResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -154,6 +154,7 @@ class NuclioSpec(KubeResourceSpec):
         add_templated_ingress_host_mode=None,
         state_thresholds=None,
         disable_default_http_trigger=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -183,6 +184,7 @@ class NuclioSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.base_spec = base_spec or {}

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -42,6 +42,8 @@ from mlrun.serving.states import (
 )
 from mlrun.utils import get_caller_globals, logger, set_paths
+from .. import KubejobRuntime
+from ..pod import KubeResourceSpec
 from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
 serving_subkind = "serving_v2"
@@ -149,6 +151,7 @@ class ServingSpec(NuclioSpec):
         state_thresholds=None,
         disable_default_http_trigger=None,
         model_endpoint_creation_task_name=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -189,6 +192,7 @@ class ServingSpec(NuclioSpec):
             service_type=service_type,
             add_templated_ingress_host_mode=add_templated_ingress_host_mode,
             disable_default_http_trigger=disable_default_http_trigger,
+            serving_spec=serving_spec,
         )
         self.models = models or {}
@@ -296,6 +300,7 @@ class ServingRuntime(RemoteRuntime):
             self.spec.graph = step
         elif topology == StepKinds.flow:
             self.spec.graph = RootFlowStep(engine=engine or "async")
+            self.spec.graph.track_models = self.spec.track_models
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"unsupported topology {topology}, use 'router' or 'flow'"
@@ -331,6 +336,8 @@ class ServingRuntime(RemoteRuntime):
         """
         # Applying model monitoring configurations
         self.spec.track_models = enable_tracking
+        if self.spec.graph and isinstance(self.spec.graph, RootFlowStep):
+            self.spec.graph.track_models = enable_tracking
         if self._spec and self._spec.function_refs:
             logger.debug(
                 "Set tracking for children references", enable_tracking=enable_tracking
@@ -343,6 +350,16 @@ class ServingRuntime(RemoteRuntime):
                         name
                     ]._function.spec.track_models = enable_tracking
+                    if self._spec.function_refs[
+                        name
+                    ]._function.spec.graph and isinstance(
+                        self._spec.function_refs[name]._function.spec.graph,
+                        RootFlowStep,
+                    ):
+                        self._spec.function_refs[
+                            name
+                        ]._function.spec.graph.track_models = enable_tracking
         if not 0 < sampling_percentage <= 100:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "`sampling_percentage` must be greater than 0 and less or equal to 100."
@@ -703,6 +720,7 @@ class ServingRuntime(RemoteRuntime):
             "track_models": self.spec.track_models,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
+            "filename": getattr(self.spec, "filename", None),
         }
         if self.spec.secret_sources:
@@ -711,6 +729,10 @@ class ServingRuntime(RemoteRuntime):
         return json.dumps(serving_spec)
+    @property
+    def serving_spec(self):
+        return self._get_serving_spec()
     def to_mock_server(
         self,
         namespace=None,
@@ -815,3 +837,40 @@ class ServingRuntime(RemoteRuntime):
             "Turn off the mock (mock=False) and make sure Nuclio is installed for real deployment to Nuclio"
         )
         self._mock_server = self.to_mock_server()
+    def to_job(self) -> KubejobRuntime:
+        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
+        if self.spec.function_refs:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
+            )
+        spec = KubeResourceSpec(
+            image=self.spec.image,
+            mode=self.spec.mode,
+            volumes=self.spec.volumes,
+            volume_mounts=self.spec.volume_mounts,
+            env=self.spec.env,
+            resources=self.spec.resources,
+            default_handler="mlrun.serving.server.execute_graph",
+            pythonpath=self.spec.pythonpath,
+            entry_points=self.spec.entry_points,
+            description=self.spec.description,
+            workdir=self.spec.workdir,
+            image_pull_secret=self.spec.image_pull_secret,
+            node_name=self.spec.node_name,
+            node_selector=self.spec.node_selector,
+            affinity=self.spec.affinity,
+            disable_auto_mount=self.spec.disable_auto_mount,
+            priority_class_name=self.spec.priority_class_name,
+            tolerations=self.spec.tolerations,
+            preemption_mode=self.spec.preemption_mode,
+            security_context=self.spec.security_context,
+            state_thresholds=self.spec.state_thresholds,
+            serving_spec=self._get_serving_spec(),
+        )
+        job = KubejobRuntime(
+            spec=spec,
+            metadata=self.metadata,
+        )
+        return job

mlrun/runtimes/pod.py CHANGED Viewed

@@ -103,6 +103,7 @@ class KubeResourceSpec(FunctionSpec):
         "preemption_mode",
         "security_context",
         "state_thresholds",
+        "serving_spec",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -178,6 +179,7 @@ class KubeResourceSpec(FunctionSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -223,6 +225,7 @@ class KubeResourceSpec(FunctionSpec):
             state_thresholds
             or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
         )
+        self.serving_spec = serving_spec
         # Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
         # _dict_fields and doesn't have a setter.
         self._termination_grace_period_seconds = None

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -58,6 +58,7 @@ class RemoteSparkSpec(KubeResourceSpec):
         preemption_mode=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -87,6 +88,7 @@ class RemoteSparkSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.provider = provider

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -168,6 +168,7 @@ class Spark3JobSpec(KubeResourceSpec):
         executor_cores=None,
         security_context=None,
         state_thresholds=None,
+        serving_spec=None,
     ):
         super().__init__(
             command=command,
@@ -197,6 +198,7 @@ class Spark3JobSpec(KubeResourceSpec):
             preemption_mode=preemption_mode,
             security_context=security_context,
             state_thresholds=state_thresholds,
+            serving_spec=serving_spec,
         )
         self.driver_resources = driver_resources or {}

mlrun/serving/server.py CHANGED Viewed

@@ -21,8 +21,9 @@ import os
 import socket
 import traceback
 import uuid
-from typing import Optional, Union
+from typing import Any, Optional, Union
+import storey
 from nuclio import Context as NuclioContext
 from nuclio.request import Logger as NuclioLogger
@@ -38,9 +39,10 @@ from mlrun.secrets import SecretsStore
 from ..common.helpers import parse_versioned_object_uri
 from ..common.schemas.model_monitoring.constants import FileTargetKind
-from ..datastore import get_stream_pusher
+from ..datastore import DataItem, get_stream_pusher
 from ..datastore.store_resources import ResourceCache
 from ..errors import MLRunInvalidArgumentError
+from ..execution import MLClientCtx
 from ..model import ModelObj
 from ..utils import get_caller_globals
 from .states import (
@@ -322,7 +324,11 @@ class GraphServer(ModelObj):
     def _process_response(self, context, response, get_body):
         body = response.body
-        if isinstance(body, context.Response) or get_body:
+        if (
+            isinstance(context, MLClientCtx)
+            or isinstance(body, context.Response)
+            or get_body
+        ):
             return body
         if body and not isinstance(body, (str, bytes)):
@@ -535,6 +541,94 @@ def v2_serving_init(context, namespace=None):
     _set_callbacks(server, context)
+async def async_execute_graph(
+    context: MLClientCtx,
+    data: DataItem,
+    batching: bool,
+    batch_size: Optional[int],
+) -> list[Any]:
+    spec = mlrun.utils.get_serving_spec()
+    source_filename = spec.get("filename", None)
+    namespace = {}
+    if source_filename:
+        with open(source_filename) as f:
+            exec(f.read(), namespace)
+    server = GraphServer.from_dict(spec)
+    if config.log_level.lower() == "debug":
+        server.verbose = True
+    context.logger.info_with("Initializing states", namespace=namespace)
+    kwargs = {}
+    if hasattr(context, "is_mock"):
+        kwargs["is_mock"] = context.is_mock
+    server.init_states(
+        context=None,  # this context is expected to be a nuclio context, which we don't have in this flow
+        namespace=namespace,
+        **kwargs,
+    )
+    context.logger.info("Initializing graph steps")
+    server.init_object(namespace)
+    context.logger.info_with("Graph was initialized", verbose=server.verbose)
+    if server.verbose:
+        context.logger.info(server.to_yaml())
+    df = data.as_df()
+    responses = []
+    async def run(body):
+        event = storey.Event(id=index, body=body)
+        response = await server.run(event, context)
+        responses.append(response)
+    if batching and not batch_size:
+        batch_size = len(df)
+    batch = []
+    for index, row in df.iterrows():
+        data = row.to_dict()
+        if batching:
+            batch.append(data)
+            if len(batch) == batch_size:
+                await run(batch)
+                batch = []
+        else:
+            await run(data)
+    if batch:
+        await run(batch)
+    termination_result = server.wait_for_completion()
+    if asyncio.iscoroutine(termination_result):
+        await termination_result
+    return responses
+def execute_graph(
+    context: MLClientCtx,
+    data: DataItem,
+    batching: bool = False,
+    batch_size: Optional[int] = None,
+) -> (list[Any], Any):
+    """
+    Execute graph as a job, from start to finish.
+    :param context: The job's execution client context.
+    :param data: The input data to the job, to be pushed into the graph row by row, or in batches.
+    :param batching: Whether to push one or more batches into the graph rather than row by row.
+    :param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
+        be pushed into the graph in one batch.
+    :return: A list of responses.
+    """
+    return asyncio.run(async_execute_graph(context, data, batching, batch_size))
 def _set_callbacks(server, context):
     if not server.graph.supports_termination() or not hasattr(context, "platform"):
         return

mlrun 1.10.0rc10__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc10py3-none-any.whl → 1.10.0rc11py3-none-any.whl