PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -12,84 +12,92 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import socket
-from typing import Any, Optional, cast
+from typing import Any, Optional, Protocol, cast
+import nuclio.request
 import numpy as np
 import pandas as pd
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.errors
 import mlrun.feature_store as fstore
+import mlrun.feature_store.feature_set as fs
 import mlrun.features
 import mlrun.serving
 import mlrun.utils
 from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
+from mlrun.common.model_monitoring.helpers import FeatureStats
+from mlrun.common.schemas import ModelEndpoint
 from mlrun.model_monitoring.helpers import (
     calculate_inputs_statistics,
-    get_endpoint_record,
 )
-from mlrun.model_monitoring.model_endpoint import ModelEndpoint
-class MonitoringApplicationContext:
+class _ArtifactsLogger(Protocol):
     """
-    The monitoring context holds all the relevant information for the monitoring application,
-    and also it can be used for logging artifacts and results.
-    The monitoring context has the following attributes:
-    :param application_name:        (str) The model monitoring application name.
-    :param project_name:            (str) The project name.
-    :param project:                 (MlrunProject) The project object.
-    :param logger:                  (mlrun.utils.Logger) MLRun logger.
-    :param nuclio_logger:           (nuclio.request.Logger) Nuclio logger.
-    :param sample_df_stats:         (FeatureStats) The new sample distribution dictionary.
-    :param feature_stats:           (FeatureStats) The train sample distribution dictionary.
-    :param sample_df:               (pd.DataFrame) The new sample DataFrame.
-    :param start_infer_time:        (pd.Timestamp) Start time of the monitoring schedule.
-    :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
-    :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
-    :param endpoint_id:             (str) ID of the monitored model endpoint
-    :param output_stream_uri:       (str) URI of the output stream for results
-    :param model_endpoint:          (ModelEndpoint) The model endpoint object.
-    :param feature_names:           (list[str]) List of models feature names.
-    :param label_names:             (list[str]) List of models label names.
-    :param model:                   (tuple[str, ModelArtifact, dict]) The model file, model spec object,
-                                    and a list of extra data items.
+    Classes that implement this protocol are :code:`MlrunProject` and :code:`MLClientCtx`.
     """
+    def log_artifact(self, *args, **kwargs) -> Artifact: ...
+    def log_dataset(self, *args, **kwargs) -> DatasetArtifact: ...
+class MonitoringApplicationContext:
+    _logger_name = "monitoring-application"
     def __init__(
         self,
         *,
-        graph_context: mlrun.serving.GraphContext,
         application_name: str,
         event: dict[str, Any],
-        model_endpoint_dict: dict[str, ModelEndpoint],
+        project: "mlrun.MlrunProject",
+        artifacts_logger: _ArtifactsLogger,
+        logger: mlrun.utils.Logger,
+        nuclio_logger: nuclio.request.Logger,
+        model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
+        sample_df: Optional[pd.DataFrame] = None,
+        feature_stats: Optional[FeatureStats] = None,
+        feature_sets_dict: Optional[dict[str, fs.FeatureSet]] = None,
     ) -> None:
         """
-        Initialize a `MonitoringApplicationContext` object.
-        Note: this object should not be instantiated manually.
-        :param application_name:    The application name.
-        :param event:               The instance data dictionary.
-        :param model_endpoint_dict: Dictionary of model endpoints.
+        The :code:`MonitoringApplicationContext` object holds all the relevant information for the
+        model monitoring application, and can be used for logging artifacts and messages.
+        The monitoring context has the following attributes:
+        :param application_name:        (str) The model monitoring application name.
+        :param project:                 (:py:class:`~mlrun.projects.MlrunProject`) The current MLRun project object.
+        :param project_name:            (str) The project name.
+        :param logger:                  (:py:class:`~mlrun.utils.Logger`) MLRun logger.
+        :param nuclio_logger:           (nuclio.request.Logger) Nuclio logger.
+        :param sample_df_stats:         (FeatureStats) The new sample distribution dictionary.
+        :param feature_stats:           (FeatureStats) The train sample distribution dictionary.
+        :param sample_df:               (pd.DataFrame) The new sample DataFrame.
+        :param start_infer_time:        (pd.Timestamp) Start time of the monitoring schedule.
+        :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
+        :param endpoint_id:             (str) ID of the monitored model endpoint
+        :param feature_set:              (FeatureSet) the model endpoint feature set
+        :param endpoint_name:           (str) Name of the monitored model endpoint
+        :param output_stream_uri:       (str) URI of the output stream for results
+        :param model_endpoint:          (ModelEndpoint) The model endpoint object.
+        :param feature_names:           (list[str]) List of models feature names.
+        :param label_names:             (list[str]) List of models label names.
+        :param model:                   (tuple[str, ModelArtifact, dict]) The model file, model spec object,
+                                        and a list of extra data items.
         """
         self.application_name = application_name
-        self.project_name = graph_context.project
-        self.project = mlrun.load_project(url=self.project_name)
+        self.project = project
+        self.project_name = project.name
+        self._artifacts_logger = artifacts_logger
         # MLRun Logger
-        self.logger = mlrun.utils.create_logger(
-            level=mlrun.mlconf.log_level,
-            formatter_kind=mlrun.mlconf.log_formatter,
-            name="monitoring-application",
-        )
+        self.logger = logger
         # Nuclio logger - `nuclio.request.Logger`.
-        # Note: this logger does not accept keyword arguments.
-        self.nuclio_logger = graph_context.logger
+        # Note: this logger accepts keyword arguments only in its `_with` methods, e.g. `info_with`.
+        self.nuclio_logger = nuclio_logger
         # event data
         self.start_infer_time = pd.Timestamp(
@@ -101,29 +109,101 @@ class MonitoringApplicationContext:
         self.endpoint_id = cast(
             str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
         )
-        self.output_stream_uri = cast(
-            str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
+        self.endpoint_name = cast(
+            str, event.get(mm_constants.ApplicationEvent.ENDPOINT_NAME)
         )
-        self._feature_stats: Optional[FeatureStats] = None
+        self._feature_stats: Optional[FeatureStats] = feature_stats
         self._sample_df_stats: Optional[FeatureStats] = None
         # Default labels for the artifacts
         self._default_labels = self._get_default_labels()
         # Persistent data - fetched when needed
-        self._sample_df: Optional[pd.DataFrame] = None
-        self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
-            self.endpoint_id
+        self._sample_df: Optional[pd.DataFrame] = sample_df
+        self._model_endpoint: Optional[ModelEndpoint] = (
+            model_endpoint_dict.get(self.endpoint_id) if model_endpoint_dict else None
+        )
+        self._feature_set: Optional[fs.FeatureSet] = (
+            feature_sets_dict.get(self.endpoint_id) if feature_sets_dict else None
+        )
+        store, _, _ = mlrun.store_manager.get_or_create_store(
+            mlrun.mlconf.artifact_path
+        )
+        self.storage_options = store.get_storage_options()
+    @classmethod
+    def _from_ml_ctx(
+        cls,
+        context: "mlrun.MLClientCtx",
+        *,
+        application_name: str,
+        event: dict[str, Any],
+        model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
+        sample_df: Optional[pd.DataFrame] = None,
+        feature_stats: Optional[FeatureStats] = None,
+    ) -> "MonitoringApplicationContext":
+        project = context.get_project_object()
+        if not project:
+            raise mlrun.errors.MLRunValueError("Could not load project from context")
+        logger = context.logger
+        artifacts_logger = context
+        nuclio_logger = nuclio.request.Logger(
+            level=mlrun.mlconf.log_level, name=cls._logger_name
+        )
+        return cls(
+            application_name=application_name,
+            event=event,
+            model_endpoint_dict=model_endpoint_dict,
+            project=project,
+            logger=logger,
+            nuclio_logger=nuclio_logger,
+            artifacts_logger=artifacts_logger,
+            sample_df=sample_df,
+            feature_stats=feature_stats,
+        )
+    @classmethod
+    def _from_graph_ctx(
+        cls,
+        graph_context: mlrun.serving.GraphContext,
+        *,
+        application_name: str,
+        event: dict[str, Any],
+        model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
+        sample_df: Optional[pd.DataFrame] = None,
+        feature_stats: Optional[FeatureStats] = None,
+        feature_sets_dict: Optional[dict[str, fs.FeatureSet]] = None,
+    ) -> "MonitoringApplicationContext":
+        nuclio_logger = graph_context.logger
+        artifacts_logger = graph_context.project_obj
+        logger = mlrun.utils.create_logger(
+            level=mlrun.mlconf.log_level,
+            formatter_kind=mlrun.mlconf.log_formatter,
+            name=cls._logger_name,
+        )
+        return cls(
+            application_name=application_name,
+            event=event,
+            project=graph_context.project_obj,
+            model_endpoint_dict=model_endpoint_dict,
+            logger=logger,
+            nuclio_logger=nuclio_logger,
+            artifacts_logger=artifacts_logger,
+            sample_df=sample_df,
+            feature_stats=feature_stats,
+            feature_sets_dict=feature_sets_dict,
         )
     def _get_default_labels(self) -> dict[str, str]:
-        return {
+        labels = {
             mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
             mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
             mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
             mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
+            mlrun_constants.MLRunInternalLabels.endpoint_name: self.endpoint_name,
         }
+        return {key: value for key, value in labels.items() if value is not None}
     def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
         """Add the default labels to logged artifacts labels"""
@@ -132,39 +212,60 @@ class MonitoringApplicationContext:
     @property
     def sample_df(self) -> pd.DataFrame:
         if self._sample_df is None:
-            feature_set = fstore.get_feature_set(
-                self.model_endpoint.status.monitoring_feature_set_uri
-            )
-            features = [f"{feature_set.metadata.name}.*"]
-            vector = fstore.FeatureVector(
-                name=f"{self.endpoint_id}_vector",
-                features=features,
-                with_indexes=True,
-            )
-            vector.metadata.tag = self.application_name
-            vector.feature_set_objects = {feature_set.metadata.name: feature_set}
-            offline_response = vector.get_offline_features(
+            if (
+                self.endpoint_name is None
+                or self.endpoint_id is None
+                or pd.isnull(self.start_infer_time)
+                or pd.isnull(self.end_infer_time)
+            ):
+                raise mlrun.errors.MLRunValueError(
+                    "You have tried to access `monitoring_context.sample_df`, but have not provided it directly "
+                    "through `sample_data`, nor have you provided the model endpoint's name, ID, and the start and "
+                    f"end times: `endpoint_name`={self.endpoint_name}, `endpoint_uid`={self.endpoint_id}, "
+                    f"`start`={self.start_infer_time}, and `end`={self.end_infer_time}. "
+                    "You can either provide the sample dataframe directly, the model endpoint's details and times, "
+                    "or adapt the application's logic to not access the sample dataframe."
+                )
+            df = self.feature_set.to_dataframe(
                 start_time=self.start_infer_time,
                 end_time=self.end_infer_time,
-                timestamp_for_filtering=mm_constants.FeatureSetFeatures.time_stamp(),
+                time_column=mm_constants.EventFieldType.TIMESTAMP,
+                storage_options=self.storage_options,
             )
-            self._sample_df = offline_response.to_dataframe().reset_index(drop=True)
+            self._sample_df = df.reset_index(drop=True)
         return self._sample_df
     @property
     def model_endpoint(self) -> ModelEndpoint:
         if not self._model_endpoint:
-            self._model_endpoint = ModelEndpoint.from_flat_dict(
-                get_endpoint_record(self.project_name, self.endpoint_id)
+            if self.endpoint_name is None or self.endpoint_id is None:
+                raise mlrun.errors.MLRunValueError(
+                    "You have NOT provided the model endpoint's name and ID: "
+                    f"`endpoint_name`={self.endpoint_name} and `endpoint_id`={self.endpoint_id}, "
+                    "but you have tried to access `monitoring_context.model_endpoint` "
+                    "directly or indirectly in your application. You can either provide them, "
+                    "or adapt the application's logic to not access the model endpoint."
+                )
+            self._model_endpoint = mlrun.db.get_run_db().get_model_endpoint(
+                name=self.endpoint_name,
+                project=self.project_name,
+                endpoint_id=self.endpoint_id,
+                feature_analysis=True,
             )
         return self._model_endpoint
+    @property
+    def feature_set(self) -> fs.FeatureSet:
+        if not self._feature_set and self.model_endpoint:
+            self._feature_set = fstore.get_feature_set(
+                self.model_endpoint.spec.monitoring_feature_set_uri
+            )
+        return self._feature_set
     @property
     def feature_stats(self) -> FeatureStats:
         if not self._feature_stats:
-            self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
-            pad_features_hist(self._feature_stats)
+            self._feature_stats = self.model_endpoint.spec.feature_stats
         return self._feature_stats
     @property
@@ -179,18 +280,12 @@ class MonitoringApplicationContext:
     @property
     def feature_names(self) -> list[str]:
         """The feature names of the model"""
-        feature_names = self.model_endpoint.spec.feature_names
-        return (
-            feature_names
-            if isinstance(feature_names, list)
-            else json.loads(feature_names)
-        )
+        return self.model_endpoint.spec.feature_names
     @property
     def label_names(self) -> list[str]:
         """The label names of the model"""
-        label_names = self.model_endpoint.spec.label_names
-        return label_names if isinstance(label_names, list) else json.loads(label_names)
+        return self.model_endpoint.spec.label_names
     @property
     def model(self) -> tuple[str, ModelArtifact, dict]:
@@ -230,14 +325,32 @@ class MonitoringApplicationContext:
         upload: Optional[bool] = None,
         labels: Optional[dict[str, str]] = None,
         target_path: Optional[str] = None,
+        unique_per_endpoint: bool = True,
         **kwargs,
     ) -> Artifact:
         """
         Log an artifact.
-        See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
+        .. caution::
+            Logging artifacts in every model monitoring window may cause scale issues.
+            This method should be called on special occasions only.
+        See :func:`~mlrun.projects.MlrunProject.log_artifact` for the full documentation, except for one
+        new argument:
+        :param unique_per_endpoint: by default ``True``, we will log different artifact for each model endpoint,
+                                    set to ``False`` without changing item key will cause artifact override.
         """
         labels = self._add_default_labels(labels)
-        return self.project.log_artifact(
+        # By default, we want to log different artifact for each model endpoint
+        endpoint_id = labels.get(mlrun_constants.MLRunInternalLabels.endpoint_id, "")
+        if unique_per_endpoint and isinstance(item, str):
+            item = f"{item}-{endpoint_id}" if endpoint_id else item
+        elif unique_per_endpoint:  # isinstance(item, Artifact) is True
+            item.key = f"{item.key}-{endpoint_id}" if endpoint_id else item.key
+        return self._artifacts_logger.log_artifact(
             item,
             body=body,
             tag=tag,
@@ -265,14 +378,30 @@ class MonitoringApplicationContext:
         target_path="",
         extra_data=None,
         label_column: Optional[str] = None,
+        unique_per_endpoint: bool = True,
         **kwargs,
     ) -> DatasetArtifact:
         """
         Log a dataset artifact.
-        See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
+        .. caution::
+            Logging datasets in every model monitoring window may cause scale issues.
+            This method should be called on special occasions only.
+        See :func:`~mlrun.projects.MlrunProject.log_dataset` for the full documentation, except for one
+        new argument:
+        :param unique_per_endpoint: by default ``True``, we will log different artifact for each model endpoint,
+                                    set to ``False`` without changing item key will cause artifact override.
         """
         labels = self._add_default_labels(labels)
-        return self.project.log_dataset(
+        # By default, we want to log different artifact for each model endpoint
+        endpoint_id = labels.get(mlrun_constants.MLRunInternalLabels.endpoint_id, "")
+        if unique_per_endpoint and isinstance(key, str):
+            key = f"{key}-{endpoint_id}" if endpoint_id else key
+        return self._artifacts_logger.log_dataset(
             key,
             df,
             tag=tag,
@@ -288,54 +417,3 @@ class MonitoringApplicationContext:
             label_column=label_column,
             **kwargs,
         )
-    def log_model(
-        self,
-        key,
-        body=None,
-        framework="",
-        tag="",
-        model_dir=None,
-        model_file=None,
-        algorithm=None,
-        metrics=None,
-        parameters=None,
-        artifact_path=None,
-        upload=None,
-        labels=None,
-        inputs: Optional[list[mlrun.features.Feature]] = None,
-        outputs: Optional[list[mlrun.features.Feature]] = None,
-        feature_vector: Optional[str] = None,
-        feature_weights: Optional[list] = None,
-        training_set=None,
-        label_column=None,
-        extra_data=None,
-        **kwargs,
-    ) -> ModelArtifact:
-        """
-        Log a model artifact.
-        See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
-        """
-        labels = self._add_default_labels(labels)
-        return self.project.log_model(
-            key,
-            body=body,
-            framework=framework,
-            tag=tag,
-            model_dir=model_dir,
-            model_file=model_file,
-            algorithm=algorithm,
-            metrics=metrics,
-            parameters=parameters,
-            artifact_path=artifact_path,
-            upload=upload,
-            labels=labels,
-            inputs=inputs,
-            outputs=outputs,
-            feature_vector=feature_vector,
-            feature_weights=feature_weights,
-            training_set=training_set,
-            label_column=label_column,
-            extra_data=extra_data,
-            **kwargs,
-        )

mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Iguazio
+# Copyright 2025 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,4 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .store import StoreBase
+from .base import (
+    _HAS_EVIDENTLY,
+    SUPPORTED_EVIDENTLY_VERSION,
+    EvidentlyModelMonitoringApplicationBase,
+)

mlrun/model_monitoring/applications/evidently/base.py ADDED Viewed

@@ -0,0 +1,146 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from abc import ABC
+from tempfile import NamedTemporaryFile
+from typing import Optional
+import semver
+import mlrun.model_monitoring.applications.base as mm_base
+import mlrun.model_monitoring.applications.context as mm_context
+from mlrun.errors import MLRunIncompatibleVersionError, MLRunValueError
+SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.7.5")
+def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
+    if ref.is_compatible(cur) or (
+        cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
+    ):
+        return
+    if cur.major == ref.major == 0 and cur.minor > ref.minor:
+        warnings.warn(
+            f"Evidently version {cur} is not compatible with the tested "
+            f"version {ref}, use at your own risk."
+        )
+    else:
+        raise MLRunIncompatibleVersionError(
+            f"Evidently version {cur} is not supported, please change to "
+            f"{ref} (or another compatible version)."
+        )
+_HAS_EVIDENTLY = False
+try:
+    import evidently  # noqa: F401
+    _check_evidently_version(
+        cur=semver.Version.parse(evidently.__version__),
+        ref=SUPPORTED_EVIDENTLY_VERSION,
+    )
+    _HAS_EVIDENTLY = True
+except ModuleNotFoundError:
+    pass
+if _HAS_EVIDENTLY:
+    from evidently.core.report import Snapshot
+    from evidently.ui.workspace import (
+        STR_UUID,
+        CloudWorkspace,
+        Project,
+        Workspace,
+        WorkspaceBase,
+    )
+class EvidentlyModelMonitoringApplicationBase(
+    mm_base.ModelMonitoringApplicationBase, ABC
+):
+    def __init__(
+        self,
+        evidently_project_id: "STR_UUID",
+        evidently_workspace_path: Optional[str] = None,
+        cloud_workspace: bool = False,
+    ) -> None:
+        """
+        A class for integrating Evidently for MLRun model monitoring within a monitoring application.
+        .. note::
+            The ``evidently`` package is not installed by default in the mlrun/mlrun image.
+            It must be installed separately to use this class.
+        :param evidently_project_id:        (str) The ID of the Evidently project.
+        :param evidently_workspace_path:    (str) The path to the Evidently workspace.
+        :param cloud_workspace:             (bool) Whether the workspace is an Evidently Cloud workspace.
+        """
+        if not _HAS_EVIDENTLY:
+            raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
+        self.evidently_workspace_path = evidently_workspace_path
+        if cloud_workspace:
+            self.get_workspace = self.get_cloud_workspace
+        self.evidently_workspace = self.get_workspace()
+        self.evidently_project_id = evidently_project_id
+        self.evidently_project = self.load_project()
+    def load_project(self) -> "Project":
+        """Load the Evidently project."""
+        return self.evidently_workspace.get_project(self.evidently_project_id)
+    def get_workspace(self) -> "WorkspaceBase":
+        """Get the Evidently workspace. Override this method for customize access to the workspace."""
+        if self.evidently_workspace_path:
+            return Workspace.create(self.evidently_workspace_path)
+        else:
+            raise MLRunValueError(
+                "A local workspace could not be created as `evidently_workspace_path` is not set.\n"
+                "If you intend to use a cloud workspace, please use `cloud_workspace=True` and set the "
+                "`EVIDENTLY_API_KEY` environment variable. In other cases, override this method."
+            )
+    def get_cloud_workspace(self) -> "CloudWorkspace":
+        """Load the Evidently cloud workspace according to the `EVIDENTLY_API_KEY` environment variable."""
+        return CloudWorkspace()
+    @staticmethod
+    def log_evidently_object(
+        monitoring_context: mm_context.MonitoringApplicationContext,
+        evidently_object: "Snapshot",
+        artifact_name: str,
+        unique_per_endpoint: bool = True,
+    ) -> None:
+        """
+        Logs an Evidently report or suite as an artifact.
+        .. caution::
+            Logging Evidently objects in every model monitoring window may cause scale issues.
+            This method should be called on special occasions only.
+        :param monitoring_context:  (MonitoringApplicationContext) The monitoring context to process.
+        :param evidently_object:    (Snapshot) The Evidently run to log, e.g. a report run.
+        :param artifact_name:       (str) The name for the logged artifact.
+        :param unique_per_endpoint: by default ``True``, we will log different artifact for each model endpoint,
+                                    set to ``False`` without changing item key will cause artifact override.
+        """
+        with NamedTemporaryFile(suffix=".html") as file:
+            evidently_object.save_html(filename=file.name)
+            monitoring_context.log_artifact(
+                artifact_name,
+                local_path=file.name,
+                unique_per_endpoint=unique_per_endpoint,
+            )

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl