PyPI - mlrun - Versions diffs - 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl - Mend

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show

mlrun/__init__.py +37 -3
mlrun/__main__.py +5 -0
mlrun/alerts/alert.py +1 -0
mlrun/artifacts/document.py +78 -36
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/runtimes/constants.py +17 -0
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/constants.py +32 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/config.py +39 -6
mlrun/datastore/datastore_profile.py +58 -16
mlrun/datastore/sources.py +7 -1
mlrun/datastore/vectorstore.py +20 -1
mlrun/db/base.py +20 -0
mlrun/db/httpdb.py +97 -10
mlrun/db/nopdb.py +19 -0
mlrun/errors.py +4 -0
mlrun/execution.py +15 -6
mlrun/frameworks/_common/model_handler.py +0 -2
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +5 -1
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/controller.py +266 -103
mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
mlrun/model_monitoring/helpers.py +16 -10
mlrun/model_monitoring/stream_processing.py +106 -35
mlrun/package/context_handler.py +1 -1
mlrun/package/packagers_manager.py +4 -18
mlrun/projects/pipelines.py +18 -5
mlrun/projects/project.py +156 -39
mlrun/runtimes/nuclio/serving.py +22 -13
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/secrets.py +1 -1
mlrun/serving/server.py +11 -3
mlrun/serving/states.py +65 -8
mlrun/serving/v2_serving.py +67 -44
mlrun/utils/helpers.py +111 -23
mlrun/utils/notifications/notification/base.py +6 -1
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +67 -36
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import random
 import threading
 import time
 import traceback
@@ -148,16 +149,24 @@ class V2ModelServer(StepToDict):
         if not self.context.is_mock and not self.model_spec:
             self.get_model()
         if not self.context.is_mock or self.context.monitoring_mock:
-            self.model_endpoint = mlrun.get_run_db().get_model_endpoint(
-                project=server.project,
-                name=self.name,
-                function_name=server.function_name,
-                function_tag=server.function_tag or "latest",
-            )
-            self.model_endpoint_uid = self.model_endpoint.metadata.uid
+            try:
+                self.model_endpoint = mlrun.get_run_db().get_model_endpoint(
+                    project=server.project,
+                    name=self.name,
+                    function_name=server.function_name,
+                    function_tag=server.function_tag or "latest",
+                )
+                self.model_endpoint_uid = self.model_endpoint.metadata.uid
+            except mlrun.errors.MLRunNotFoundError:
+                logger.info(
+                    "Model Endpoint not found for this step we will not monitor this model",
+                    function_name=server.function_name,
+                    name=self.name,
+                )
+                self.model_endpoint, self.model_endpoint_uid = None, None
         self._model_logger = (
             _ModelLogPusher(self, self.context)
-            if self.context and self.context.stream.enabled
+            if self.context and self.context.stream.enabled and self.model_endpoint_uid
             else None
         )
@@ -283,7 +292,6 @@ class V2ModelServer(StepToDict):
             }
             if self.version:
                 response["model_version"] = self.version
         elif op == "ready" and event.method == "GET":
             # get model health operation
             setattr(event, "terminated", True)
@@ -468,13 +476,9 @@ class _ModelLogPusher:
         self.hostname = context.stream.hostname
         self.function_uri = context.stream.function_uri
         self.stream_path = context.stream.stream_uri
-        self.stream_batch = int(context.get_param("log_stream_batch", 1))
-        self.stream_sample = int(context.get_param("log_stream_sample", 1))
+        self.sampling_percentage = float(context.get_param("sampling_percentage", 100))
         self.output_stream = output_stream or context.stream.output_stream
         self._worker = context.worker_id
-        self._sample_iter = 0
-        self._batch_iter = 0
-        self._batch = []
     def base_data(self):
         base_data = {
@@ -485,6 +489,7 @@ class _ModelLogPusher:
             "host": self.hostname,
             "function_uri": self.function_uri,
             "endpoint_id": self.model.model_endpoint_uid,
+            "sampling_percentage": self.sampling_percentage,
         }
         if getattr(self.model, "labels", None):
             base_data["labels"] = self.model.labels
@@ -504,37 +509,55 @@ class _ModelLogPusher:
             self.output_stream.push([data], partition_key=partition_key)
             return
-        self._sample_iter = (self._sample_iter + 1) % self.stream_sample
-        if self.output_stream and self._sample_iter == 0:
+        if self.output_stream:
+            # Ensure that the inputs are a list of lists
+            request["inputs"] = (
+                request["inputs"]
+                if not any(not isinstance(req, list) for req in request["inputs"])
+                else [request["inputs"]]
+            )
             microsec = (now_date() - start).microseconds
-            if self.stream_batch > 1:
-                if self._batch_iter == 0:
-                    self._batch = []
-                self._batch.append(
-                    [request, op, resp, str(start), microsec, self.model.metrics]
+            if self.sampling_percentage != 100:
+                # Randomly select a subset of the requests based on the percentage
+                num_of_inputs = len(request["inputs"])
+                sampled_requests_indices = self._pick_random_requests(
+                    num_of_inputs, self.sampling_percentage
                 )
-                self._batch_iter = (self._batch_iter + 1) % self.stream_batch
-                if self._batch_iter == 0:
-                    data = self.base_data()
-                    data["headers"] = [
-                        "request",
-                        "op",
-                        "resp",
-                        "when",
-                        "microsec",
-                        "metrics",
+                if not sampled_requests_indices:
+                    # No events were selected for sampling
+                    return
+                request["inputs"] = [
+                    request["inputs"][i] for i in sampled_requests_indices
+                ]
+                if resp and "outputs" in resp and isinstance(resp["outputs"], list):
+                    resp["outputs"] = [
+                        resp["outputs"][i] for i in sampled_requests_indices
                     ]
-                    data["values"] = self._batch
-                    self.output_stream.push([data], partition_key=partition_key)
-            else:
-                data = self.base_data()
-                data["request"] = request
-                data["op"] = op
-                data["resp"] = resp
-                data["when"] = start_str
-                data["microsec"] = microsec
-                if getattr(self.model, "metrics", None):
-                    data["metrics"] = self.model.metrics
-                self.output_stream.push([data], partition_key=partition_key)
+            data = self.base_data()
+            data["request"] = request
+            data["op"] = op
+            data["resp"] = resp
+            data["when"] = start_str
+            data["microsec"] = microsec
+            if getattr(self.model, "metrics", None):
+                data["metrics"] = self.model.metrics
+            data["effective_sample_count"] = len(request["inputs"])
+            self.output_stream.push([data], partition_key=partition_key)
+    @staticmethod
+    def _pick_random_requests(num_of_reqs: int, percentage: float) -> list[int]:
+        """
+        Randomly selects indices of requests to sample based on the given percentage
+        :param num_of_reqs: Number of requests to select from
+        :param percentage: Sample percentage for each request
+        :return: A list containing the indices of the selected requests
+        """
+        return [
+            req for req in range(num_of_reqs) if random.random() < (percentage / 100)
+        ]

mlrun/utils/helpers.py CHANGED Viewed

@@ -13,8 +13,10 @@
 # limitations under the License.
 import asyncio
+import base64
 import enum
 import functools
+import gzip
 import hashlib
 import inspect
 import itertools
@@ -91,14 +93,19 @@ class StorePrefix:
     Artifact = "artifacts"
     Model = "models"
     Dataset = "datasets"
+    Document = "documents"
     @classmethod
     def is_artifact(cls, prefix):
-        return prefix in [cls.Artifact, cls.Model, cls.Dataset]
+        return prefix in [cls.Artifact, cls.Model, cls.Dataset, cls.Document]
     @classmethod
     def kind_to_prefix(cls, kind):
-        kind_map = {"model": cls.Model, "dataset": cls.Dataset}
+        kind_map = {
+            "model": cls.Model,
+            "dataset": cls.Dataset,
+            "document": cls.Document,
+        }
         return kind_map.get(kind, cls.Artifact)
     @classmethod
@@ -109,6 +116,7 @@ class StorePrefix:
             cls.Dataset,
             cls.FeatureSet,
             cls.FeatureVector,
+            cls.Document,
         ]
@@ -1038,31 +1046,85 @@ async def retry_until_successful_async(
     ).run()
-def get_ui_url(project, uid=None):
-    url = ""
+def get_project_url(project: str) -> str:
+    """
+    Generate the base URL for a given project.
+    :param project: The project name.
+    :return: The base URL for the project, or an empty string if the base URL is not resolved.
+    """
     if mlrun.mlconf.resolve_ui_url():
-        url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/jobs"
-        if uid:
-            url += f"/monitor/{uid}/overview"
-    return url
+        return f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}"
+    return ""
-def get_model_endpoint_url(project, model_name, model_endpoint_id):
-    url = ""
-    if mlrun.mlconf.resolve_ui_url():
-        url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
-        if model_name:
-            url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
+def get_run_url(project: str, uid: str, name: str) -> str:
+    """
+    Generate the URL for a specific run.
+    :param project: The project name.
+    :param uid: The run UID.
+    :param name: The run name.
+    :return: The URL for the run, or an empty string if the base URL is not resolved.
+    """
+    runs_url = get_runs_url(project)
+    if not runs_url:
+        return ""
+    return f"{runs_url}/monitor-jobs/{name}/{uid}/overview"
+def get_runs_url(project: str) -> str:
+    """
+    Generate the URL for the runs of a given project.
+    :param project: The project name.
+    :return: The URL for the runs, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    return f"{base_url}/jobs"
+def get_model_endpoint_url(
+    project: str,
+    model_name: Optional[str] = None,
+    model_endpoint_id: Optional[str] = None,
+) -> str:
+    """
+    Generate the URL for a specific model endpoint.
+    :param project: The project name.
+    :param model_name: The model name.
+    :param model_endpoint_id: The model endpoint ID.
+    :return: The URL for the model endpoint, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    url = f"{base_url}/models"
+    if model_name and model_endpoint_id:
+        url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
     return url
-def get_workflow_url(project, id=None):
-    url = ""
-    if mlrun.mlconf.resolve_ui_url():
-        url = (
-            f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
-            f"/{project}/jobs/monitor-workflows/workflow/{id}"
-        )
+def get_workflow_url(
+    project: str,
+    id: Optional[str] = None,
+) -> str:
+    """
+    Generate the URL for a specific workflow.
+    :param project: The project name.
+    :param id: The workflow ID.
+    :return: The URL for the workflow, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    url = f"{base_url}/jobs/monitor-workflows/workflow"
+    if id:
+        url += f"/{id}"
     return url
@@ -1709,7 +1771,14 @@ def get_serving_spec():
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Failed to find serving spec in env var or config file"
             )
-    spec = json.loads(data)
+    # Attempt to decode and decompress, or use as-is for backward compatibility
+    try:
+        decoded_data = base64.b64decode(data)
+        decompressed_data = gzip.decompress(decoded_data)
+        spec = json.loads(decompressed_data.decode("utf-8"))
+    except (OSError, gzip.BadGzipFile, base64.binascii.Error, json.JSONDecodeError):
+        spec = json.loads(data)
     return spec
@@ -1981,7 +2050,20 @@ class Workflow:
         if not workflow_id:
             return steps
-        workflow_manifest = Workflow._get_workflow_manifest(workflow_id)
+        try:
+            workflow_manifest = Workflow._get_workflow_manifest(workflow_id)
+        except Exception:
+            logger.warning(
+                "Failed to extract workflow steps from workflow manifest, "
+                "returning all runs with the workflow id label",
+                workflow_id=workflow_id,
+                traceback=traceback.format_exc(),
+            )
+            return db.list_runs(
+                project=project,
+                labels=f"workflow={workflow_id}",
+            )
         if not workflow_manifest:
             return steps
@@ -2038,3 +2120,9 @@ class Workflow:
         kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
         return kfp_run.workflow_manifest()
+def as_dict(data: typing.Union[dict, str]) -> dict:
+    if isinstance(data, str):
+        return json.loads(data)
+    return data

mlrun/utils/notifications/notification/base.py CHANGED Viewed

@@ -134,7 +134,12 @@ class NotificationBase:
             event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
         ):  # JOB entity
             uid = event_data.value_dict.get("uid")
-            url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
+            name = event_data.entity.ids[0]
+            url = mlrun.utils.helpers.get_run_url(
+                alert.project,
+                uid=uid,
+                name=name,
+            )
             overview_type = "Job overview"
         else:  # MODEL entity
             model_name = event_data.value_dict.get("model")

mlrun/utils/notifications/notification/slack.py CHANGED Viewed

@@ -168,7 +168,11 @@ class SlackNotification(NotificationBase):
     def _get_run_line(self, run: dict) -> dict:
         meta = run["metadata"]
-        url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
+        url = mlrun.utils.helpers.get_run_url(
+            meta.get("project"),
+            uid=meta.get("uid"),
+            name=meta.get("name"),
+        )
         # Only show the URL if the run is not a function (serving or mlrun function)
         kind = run.get("step_kind")

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -139,15 +139,25 @@ class NotificationPusher(_NotificationPusherBase):
                     error=mlrun.errors.err_to_str(exc),
                 )
-    def _process_notification(self, notification, run):
-        notification.status = run.status.notifications.get(notification.name, {}).get(
+    def _process_notification(self, notification_object, run):
+        notification_object.status = run.status.notifications.get(
+            notification_object.name, {}
+        ).get(
             "status",
             mlrun.common.schemas.NotificationStatus.PENDING,
         )
-        if self._should_notify(run, notification):
-            self._load_notification(run, notification)
+        if self._should_notify(run, notification_object):
+            notification = self._load_notification(notification_object)
+            if notification.is_async:
+                self._async_notifications.append(
+                    (notification, run, notification_object)
+                )
+            else:
+                self._sync_notifications.append(
+                    (notification, run, notification_object)
+                )
-    def push(self):
+    def push(self, sync_push_callback=None, async_push_callback=None):
         """
         Asynchronously push notifications for all runs in the initialized runs list (if they should be pushed).
         When running from a sync environment, the notifications will be pushed asynchronously however the function will
@@ -190,7 +200,7 @@ class NotificationPusher(_NotificationPusherBase):
                         "Failed to push notification async",
                         error=mlrun.errors.err_to_str(result),
                         traceback=traceback.format_exception(
-                            etype=type(result),
+                            result,
                             value=result,
                             tb=result.__traceback__,
                         ),
@@ -201,8 +211,9 @@ class NotificationPusher(_NotificationPusherBase):
             notifications_amount=len(self._sync_notifications)
             + len(self._async_notifications),
         )
-        self._push(sync_push, async_push)
+        sync_push_callback = sync_push_callback or sync_push
+        async_push_callback = async_push_callback or async_push
+        self._push(sync_push_callback, async_push_callback)
     @staticmethod
     def _should_notify(
@@ -241,24 +252,19 @@ class NotificationPusher(_NotificationPusherBase):
         return False
     def _load_notification(
-        self, run: mlrun.model.RunObject, notification_object: mlrun.model.Notification
+        self, notification_object: mlrun.model.Notification
     ) -> base.NotificationBase:
         name = notification_object.name
         notification_type = notification_module.NotificationTypes(
             notification_object.kind or notification_module.NotificationTypes.console
         )
         params = {}
-        params.update(notification_object.secret_params)
-        params.update(notification_object.params)
+        params.update(notification_object.secret_params or {})
+        params.update(notification_object.params or {})
         default_params = self._default_params.get(notification_type.value, {})
         notification = notification_type.get_notification()(
             name, params, default_params
         )
-        if notification.is_async:
-            self._async_notifications.append((notification, run, notification_object))
-        else:
-            self._sync_notifications.append((notification, run, notification_object))
         logger.debug(
             "Loaded notification", notification=name, type=notification_type.value
         )
@@ -406,8 +412,17 @@ class NotificationPusher(_NotificationPusherBase):
         sent_time: typing.Optional[datetime.datetime] = None,
         reason: typing.Optional[str] = None,
     ):
-        if run_state not in runtimes_constants.RunStates.terminal_states():
-            # we want to update the notification status only if the run is in a terminal state for BC
+        # Skip update the notification state if the following conditions are met:
+        # 1. the run is not in a terminal state
+        # 2. the when contains only one state (which is the current state)
+        # Skip updating because currently each notification has only one row in the db, even if it has multiple when.
+        # This means that if the notification is updated to sent for running state for example, it will not send for
+        # The terminal state
+        # TODO: Change this behavior after implementing ML-8723
+        if (
+            run_state not in runtimes_constants.RunStates.terminal_states()
+            and len(notification.when) > 1
+        ):
             logger.debug(
                 "Skip updating notification status - run not in terminal state",
                 run_uid=run_uid,
@@ -496,6 +511,14 @@ class CustomNotificationPusher(_NotificationPusherBase):
         notification_type: str,
         params: typing.Optional[dict[str, str]] = None,
     ):
+        if notification_type not in [
+            notification_module.NotificationTypes.console,
+            notification_module.NotificationTypes.ipython,
+        ]:
+            # We want that only the console and ipython notifications will be notified by the client.
+            # The rest of the notifications will be notified by the BE.
+            return
         if notification_type in self._async_notifications:
             self._async_notifications[notification_type].load_notification(params)
         elif notification_type in self._sync_notifications:
@@ -565,25 +588,9 @@ class CustomNotificationPusher(_NotificationPusherBase):
         pipeline_id: typing.Optional[str] = None,
         has_workflow_url: bool = False,
     ):
-        message = f"Workflow started in project {project}"
-        if pipeline_id:
-            message += f" id={pipeline_id}"
-        commit_id = (
-            commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
+        html, message = self.generate_start_message(
+            commit_id, has_workflow_url, pipeline_id, project
         )
-        if commit_id:
-            message += f", commit={commit_id}"
-        if has_workflow_url:
-            url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
-        else:
-            url = mlrun.utils.helpers.get_ui_url(project)
-        html = ""
-        if url:
-            html = (
-                message
-                + f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
-            )
-            message = message + f", check progress in {url}"
         self.push(message, "info", custom_html=html)
     def push_pipeline_run_results(
@@ -616,6 +623,30 @@ class CustomNotificationPusher(_NotificationPusherBase):
             text += f", state={state}"
         self.push(text, "info", runs=runs_list)
+    def generate_start_message(
+        self, commit_id=None, has_workflow_url=None, pipeline_id=None, project=None
+    ):
+        message = f"Workflow started in project {project}"
+        if pipeline_id:
+            message += f" id={pipeline_id}"
+        commit_id = (
+            commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
+        )
+        if commit_id:
+            message += f", commit={commit_id}"
+        if has_workflow_url:
+            url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
+        else:
+            url = mlrun.utils.helpers.get_runs_url(project)
+        html = ""
+        if url:
+            html = (
+                message
+                + f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
+            )
+            message = message + f", check progress in {url}"
+        return html, message
 def sanitize_notification(notification_dict: dict):
     notification_dict.pop("secret_params", None)

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "6671b915fe08ba2b9afa054585f86b955b48b671",
-  "version": "1.8.0-rc19"
+  "git_commit": "d9ddaa33eb375080a0577da3aa2849915b60e30d",
+  "version": "1.8.0-rc26"
 }

mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl