PyPI - mlrun - Versions diffs - 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl - Mend

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (101) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +9 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/common/secrets.py +22 -1
mlrun/config.py +34 -21
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +265 -7
mlrun/datastore/datastore.py +10 -5
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +367 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +211 -74
mlrun/datastore/model_provider/openai_provider.py +243 -71
mlrun/datastore/s3.py +24 -2
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +27 -19
mlrun/db/httpdb.py +57 -48
mlrun/db/nopdb.py +25 -10
mlrun/execution.py +55 -13
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +2 -0
mlrun/model.py +9 -3
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +388 -138
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +36 -13
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
mlrun/model_monitoring/helpers.py +28 -5
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +16 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +157 -69
mlrun/run.py +97 -20
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +147 -17
mlrun/runtimes/nuclio/function.py +72 -27
mlrun/runtimes/nuclio/serving.py +102 -20
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +54 -13
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +230 -40
mlrun/serving/states.py +605 -232
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +136 -81
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +215 -83
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/mail.py +38 -15
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/writer.py CHANGED Viewed

@@ -13,9 +13,12 @@
 # limitations under the License.
 import json
+import typing
 from datetime import datetime, timezone
 from typing import Any, Callable, NewType, Optional
+import storey
 import mlrun.common.model_monitoring
 import mlrun.common.schemas
 import mlrun.common.schemas.alert as alert_objects
@@ -31,6 +34,8 @@ from mlrun.common.schemas.model_monitoring.constants import (
     WriterEvent,
     WriterEventKind,
 )
+from mlrun.config import config
+from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.model_monitoring.db._stats import (
     ModelMonitoringCurrentStatsFile,
     ModelMonitoringDriftMeasuresFile,
@@ -73,7 +78,6 @@ class ModelMonitoringWriter(StepToDict):
         self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
             project=self.project, secret_provider=secret_provider
         )
-        self._endpoints_records = {}
     def _generate_event_on_drift(
         self,
@@ -226,3 +230,218 @@ class ModelMonitoringWriter(StepToDict):
             )
         logger.info("Model monitoring writer finished handling event")
+class WriterGraphFactory:
+    def __init__(
+        self,
+        parquet_path: str,
+    ):
+        self.parquet_path = parquet_path
+        self.parquet_batching_max_events = (
+            config.model_endpoint_monitoring.writer_graph.max_events
+        )
+        self.parquet_batching_timeout_secs = (
+            config.model_endpoint_monitoring.writer_graph.parquet_batching_timeout_secs
+        )
+    def apply_writer_graph(
+        self,
+        fn: mlrun.runtimes.ServingRuntime,
+        tsdb_connector: TSDBConnector,
+    ):
+        graph = typing.cast(
+            mlrun.serving.states.RootFlowStep,
+            fn.set_topology(mlrun.serving.states.StepKinds.flow, engine="async"),
+        )
+        graph.to("ReconstructWriterEvent", "event_reconstructor")
+        step = tsdb_connector.add_pre_writer_steps(
+            graph=graph, after="event_reconstructor"
+        )
+        before_choice = step.name if step else "event_reconstructor"
+        graph.add_step("KindChoice", "kind_choice_step", after=before_choice)
+        tsdb_connector.apply_writer_steps(
+            graph=graph,
+            after="kind_choice_step",
+        )
+        graph.add_step(
+            "AlertGenerator",
+            "alert_generator",
+            after="kind_choice_step",
+            project=fn.metadata.project,
+        )
+        graph.add_step(
+            "storey.Filter",
+            name="filter_none",
+            _fn="(event is not None)",
+            after="alert_generator",
+        )
+        graph.add_step(
+            "mlrun.serving.remote.MLRunAPIRemoteStep",
+            name="alert_generator_api_call",
+            after="filter_none",
+            method="POST",
+            path=f"projects/{fn.metadata.project}/events/{{kind}}",
+            fill_placeholders=True,
+        )
+        graph.add_step(
+            "mlrun.datastore.storeytargets.ParquetStoreyTarget",
+            alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
+            name="stats_writer",
+            after="kind_choice_step",
+            graph_shape="cylinder",
+            path=self.parquet_path
+            if self.parquet_path.endswith("/")
+            else self.parquet_path + "/",
+            max_events=self.parquet_batching_max_events,
+            flush_after_seconds=self.parquet_batching_timeout_secs,
+            columns=[
+                StatsData.TIMESTAMP,
+                StatsData.STATS,
+                WriterEvent.ENDPOINT_ID,
+                StatsData.STATS_NAME,
+            ],
+            partition_cols=[WriterEvent.ENDPOINT_ID, StatsData.STATS_NAME],
+            single_file=True,
+        )
+class ReconstructWriterEvent(storey.MapClass):
+    def __init__(self):
+        super().__init__()
+    def do(self, event: dict) -> dict[str, Any]:
+        logger.info("Reconstructing the event", event=event)
+        kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
+        result_event = _AppResultEvent(json.loads(event.pop(WriterEvent.DATA, "{}")))
+        result_event.update(_AppResultEvent(event))
+        expected_keys = list(
+            set(WriterEvent.list()).difference(
+                [WriterEvent.EVENT_KIND, WriterEvent.DATA]
+            )
+        )
+        if kind == WriterEventKind.METRIC:
+            expected_keys.extend(MetricData.list())
+        elif kind == WriterEventKind.RESULT:
+            expected_keys.extend(ResultData.list())
+        elif kind == WriterEventKind.STATS:
+            expected_keys.extend(StatsData.list())
+        else:
+            raise _WriterEventValueError(
+                f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
+            )
+        missing_keys = [key for key in expected_keys if key not in result_event]
+        if missing_keys:
+            raise _WriterEventValueError(
+                f"The received event misses some keys compared to the expected "
+                f"monitoring application event schema: {missing_keys} for event kind {kind}"
+            )
+        result_event["kind"] = kind
+        if kind in WriterEventKind.user_app_outputs():
+            result_event[WriterEvent.END_INFER_TIME] = datetime.fromisoformat(
+                event[WriterEvent.END_INFER_TIME]
+            )
+        if kind == WriterEventKind.STATS:
+            result_event[StatsData.STATS] = json.dumps(result_event[StatsData.STATS])
+        return result_event
+class KindChoice(storey.Choice):
+    def select_outlets(self, event):
+        kind = event.get("kind")
+        logger.info("Selecting the outlet for the event", kind=kind)
+        if kind == WriterEventKind.METRIC:
+            outlets = ["tsdb_metrics"]
+        elif kind == WriterEventKind.RESULT:
+            outlets = ["tsdb_app_results", "alert_generator"]
+        elif kind == WriterEventKind.STATS:
+            outlets = ["stats_writer"]
+        else:
+            raise _WriterEventValueError(
+                f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
+            )
+        return outlets
+class AlertGenerator(storey.MapClass):
+    def __init__(self, project: str, **kwargs):
+        self.project = project
+        super().__init__(**kwargs)
+    def do(self, event: dict) -> Optional[dict[str, Any]]:
+        kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
+        if (
+            mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
+            and kind == WriterEventKind.RESULT
+            and (
+                event[ResultData.RESULT_STATUS] == ResultStatusApp.detected.value
+                or event[ResultData.RESULT_STATUS]
+                == ResultStatusApp.potential_detection.value
+            )
+        ):
+            event_value = {
+                "app_name": event[WriterEvent.APPLICATION_NAME],
+                "model": event[WriterEvent.ENDPOINT_NAME],
+                "model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
+                "result_name": event[ResultData.RESULT_NAME],
+                "result_value": event[ResultData.RESULT_VALUE],
+            }
+            data = self._generate_event_data(
+                entity_id=get_result_instance_fqn(
+                    event[WriterEvent.ENDPOINT_ID],
+                    event[WriterEvent.APPLICATION_NAME],
+                    event[ResultData.RESULT_NAME],
+                ),
+                result_status=event[ResultData.RESULT_STATUS],
+                event_value=event_value,
+                project_name=self.project,
+                result_kind=event[ResultData.RESULT_KIND],
+            )
+            event = data.dict()
+            logger.info("Generated alert event", event=event)
+            return event
+        return None
+    @staticmethod
+    def _generate_alert_event_kind(
+        result_kind: int, result_status: int
+    ) -> alert_objects.EventKind:
+        """Generate the required Event Kind format for the alerting system"""
+        event_kind = ResultKindApp(value=result_kind).name
+        if result_status == ResultStatusApp.detected.value:
+            event_kind = f"{event_kind}_detected"
+        else:
+            event_kind = f"{event_kind}_suspected"
+        return alert_objects.EventKind(
+            value=mlrun.utils.helpers.normalize_name(event_kind)
+        )
+    def _generate_event_data(
+        self,
+        entity_id: str,
+        result_status: int,
+        event_value: dict,
+        project_name: str,
+        result_kind: int,
+    ) -> mlrun.common.schemas.Event:
+        entity = mlrun.common.schemas.alert.EventEntities(
+            kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
+            project=project_name,
+            ids=[entity_id],
+        )
+        event_kind = self._generate_alert_event_kind(
+            result_status=result_status, result_kind=result_kind
+        )
+        event_data = mlrun.common.schemas.Event(
+            kind=alert_objects.EventKind(value=event_kind),
+            entity=entity,
+            value_dict=event_value,
+        )
+        return event_data

mlrun/platforms/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .iguazio import (
 )
+# TODO: Remove in 1.11.0
 class _DeprecationHelper:
     """A helper class to deprecate old schemas"""
@@ -48,12 +49,12 @@ class _DeprecationHelper:
     def _warn(self):
         warnings.warn(
             f"mlrun.platforms.{self._new_target} is deprecated since version {self._version}, "
-            f"and will be removed in 1.10. Use mlrun.runtimes.mounts.{self._new_target} instead.",
+            f"and will be removed in 1.11.0. Use mlrun.runtimes.mounts.{self._new_target} instead.",
             FutureWarning,
         )
-# TODO: Remove in 1.10
+# TODO: Remove in 1.11.0
 # For backwards compatibility
 VolumeMount = _DeprecationHelper("VolumeMount")
 auto_mount = _DeprecationHelper("auto_mount")

mlrun/platforms/iguazio.py CHANGED Viewed

@@ -96,7 +96,11 @@ class OutputStream:
         if access_key:
             v3io_client_kwargs["access_key"] = access_key
-        self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
+        if not mock:
+            self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
+        else:
+            self._v3io_client = None
         self._container, self._stream_path = split_path(stream_path)
         self._shards = shards
         self._retention_in_hours = retention_in_hours
@@ -105,7 +109,7 @@ class OutputStream:
         self._mock = mock
         self._mock_queue = []
-    def create_stream(self):
+    def create_stream(self) -> None:
         # this import creates an import loop via the utils module, so putting it in execution path
         from mlrun.utils.helpers import logger
@@ -210,7 +214,7 @@ class KafkaOutputStream:
         self._initialized = False
     def _lazy_init(self):
-        if self._initialized:
+        if self._initialized or self._mock:
             return
         import kafka

mlrun/projects/operations.py CHANGED Viewed

@@ -85,17 +85,17 @@ def run_function(
 ) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
     """Run a local or remote task as part of a local/kubeflow pipeline
-    run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
-    function can be specified as an object or by name (str), when the function is specified by name it is looked up
-    in the current project eliminating the need to redefine/edit functions.
+    run_function() allows you to execute a function locally, on a remote cluster, or as part of an automated workflow.
+    The function can be specified as an object or by name (str). When the function is specified by name it is looked up
+    in the current project, eliminating the need to redefine/edit functions.
-    when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
+    When functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
     e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
-    project runs provide additional notifications/reporting and exception handling.
-    inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
-    some behavior may differ between regular runs and deferred KFP runs.
+    Project runs provide additional notifications/reporting and exception handling.
+    Inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG.
+    Some behavior may differ between regular runs and deferred KFP runs.
-    example (use with function object)::
+    Example (use with function object)::
         LABELS = "is_error"
         MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
@@ -107,7 +107,7 @@ def run_function(
             inputs={"dataset": DATA_PATH},
         )
-    example (use with project)::
+    Example (use with project)::
         # create a project with two functions (local and from hub)
         project = mlrun.new_project(project_name, "./proj)
@@ -119,7 +119,7 @@ def run_function(
         run2 = run_function("train", params={"label_columns": LABELS, "model_class": MODEL_CLASS},
                                      inputs={"dataset": run1.outputs["data"]})
-    example (use in pipeline)::
+    Example (use in pipeline)::
         @dsl.pipeline(name="test pipeline", description="test")
         def my_pipe(url=""):
@@ -177,7 +177,12 @@ def run_function(
                             This ensures latest code changes are executed. This argument must be used in
                             conjunction with the local=True argument.
     :param output_path:     path to store artifacts, when running in a workflow this will be set automatically
-    :param retry:           Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
+    :param retry:           Retry configuration for the run, can be a dict or an instance of
+                            :py:class:`~mlrun.model.Retry`.
+                            The `count` field in the `Retry` object specifies the number of retry attempts.
+                            If `count=0`, the run will not be retried.
+                            The `backoff` field specifies the retry backoff strategy between retry attempts.
+                            If not provided, the default backoff delay is 30 seconds.
     :return: MLRun RunObject or PipelineNodeWrapper
     """
     if artifact_path:

mlrun/projects/pipelines.py CHANGED Viewed

@@ -228,11 +228,11 @@ class _PipelineContext:
         force_run_local = mlrun.mlconf.force_run_local
         if force_run_local is None or force_run_local == "auto":
             force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
+        if self.workflow:
             if not mlrun.mlconf.kfp_url:
                 logger.debug("Kubeflow pipeline URL is not set, running locally")
                 force_run_local = True
-        if self.workflow:
             force_run_local = force_run_local or self.workflow.run_local
         return force_run_local

mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl