PyPI - mlrun - Versions diffs - 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc4py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc4.dist-info/RECORD +0 -351
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -23,33 +23,43 @@ import pandas as pd
 import mlrun.artifacts
 import mlrun.common.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.datastore.base
 import mlrun.feature_store
 import mlrun.model_monitoring.applications as mm_app
 import mlrun.serving
+from mlrun.common.schemas import ModelEndpoint
+from mlrun.common.schemas.model_monitoring import (
+    FunctionURI,
+)
 from mlrun.data_types.infer import InferOptions, get_df_stats
 from mlrun.utils import datetime_now, logger
 from .helpers import update_model_endpoint_last_request
-from .model_endpoint import ModelEndpoint
 # A union of all supported dataset types:
 DatasetType = typing.Union[
-    mlrun.DataItem, list, dict, pd.DataFrame, pd.Series, np.ndarray, typing.Any
+    mlrun.datastore.base.DataItem,
+    list,
+    dict,
+    pd.DataFrame,
+    pd.Series,
+    np.ndarray,
+    typing.Any,
 ]
 def get_or_create_model_endpoint(
     project: str,
+    model_endpoint_name: str,
     model_path: str = "",
-    model_endpoint_name: str = "",
     endpoint_id: str = "",
     function_name: str = "",
-    context: mlrun.MLClientCtx = None,
-    sample_set_statistics: dict[str, typing.Any] = None,
-    drift_threshold: typing.Optional[float] = None,
-    possible_drift_threshold: typing.Optional[float] = None,
-    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
+    function_tag: str = "latest",
+    context: typing.Optional["mlrun.MLClientCtx"] = None,
+    sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
+    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
     db_session=None,
+    feature_analysis: bool = False,
 ) -> ModelEndpoint:
     """
     Get a single model endpoint object. If not exist, generate a new model endpoint with the provided parameters. Note
@@ -57,40 +67,41 @@ def get_or_create_model_endpoint(
     features, set `monitoring_mode=enabled`.
     :param project:                  Project name.
-    :param model_path:               The model store path (applicable only to new endpoint_id).
     :param model_endpoint_name:      If a new model endpoint is created, the model endpoint name will be presented
                                      under this endpoint (applicable only to new endpoint_id).
+    :param model_path:               The model store path (applicable only to new endpoint_id).
     :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
                                      on the provided `endpoint_id`.
-    :param function_name:            If a new model endpoint is created, use this function name for generating the
-                                     function URI (applicable only to new endpoint_id).
+    :param function_name:            If a new model endpoint is created, use this function name.
+    :param function_tag:             If a new model endpoint is created, use this function tag.
     :param context:                  MLRun context. If `function_name` not provided, use the context to generate the
                                      full function hash.
     :param sample_set_statistics:    Dictionary of sample set statistics that will be used as a reference data for
                                      the new model endpoint (applicable only to new endpoint_id).
-    :param drift_threshold:          (deprecated) The threshold of which to mark drifts (applicable only to new
-                                     endpoint_id).
-    :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
-                                     endpoint_id).
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id
                                      (applicable only to new endpoint_id).
     :param db_session:               A runtime session that manages the current dialog with the database.
+    :param feature_analysis:         If True, the model endpoint will be retrieved with the feature analysis mode.
     :return: A ModelEndpoint object
     """
-    if not endpoint_id:
-        # Generate a new model endpoint id based on the project name and model name
-        endpoint_id = hashlib.sha1(
-            f"{project}_{model_endpoint_name}".encode()
-        ).hexdigest()
     if not db_session:
         # Generate a runtime database
         db_session = mlrun.get_run_db()
+    model_endpoint = None
+    if not function_name and context:
+        function_name = FunctionURI.from_string(
+            context.to_dict()["spec"]["function"]
+        ).function
     try:
         model_endpoint = db_session.get_model_endpoint(
-            project=project, endpoint_id=endpoint_id
+            project=project,
+            name=model_endpoint_name,
+            endpoint_id=endpoint_id,
+            function_name=function_name,
+            function_tag=function_tag or "latest",
+            feature_analysis=feature_analysis,
         )
         # If other fields provided, validate that they are correspond to the existing model endpoint data
         _model_endpoint_validations(
@@ -99,17 +110,17 @@ def get_or_create_model_endpoint(
             sample_set_statistics=sample_set_statistics,
         )
-    except mlrun.errors.MLRunNotFoundError:
+    except (mlrun.errors.MLRunNotFoundError, mlrun.errors.MLRunInvalidArgumentError):
         # Create a new model endpoint with the provided details
+        pass
+    if not model_endpoint:
         model_endpoint = _generate_model_endpoint(
             project=project,
             db_session=db_session,
-            endpoint_id=endpoint_id,
             model_path=model_path,
             model_endpoint_name=model_endpoint_name,
             function_name=function_name,
-            context=context,
-            sample_set_statistics=sample_set_statistics,
+            function_tag=function_tag,
             monitoring_mode=monitoring_mode,
         )
     return model_endpoint
@@ -121,7 +132,7 @@ def record_results(
     model_endpoint_name: str,
     endpoint_id: str = "",
     function_name: str = "",
-    context: typing.Optional[mlrun.MLClientCtx] = None,
+    context: typing.Optional["mlrun.MLClientCtx"] = None,
     infer_results_df: typing.Optional[pd.DataFrame] = None,
     sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
     monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
@@ -149,7 +160,8 @@ def record_results(
     :param context:                  MLRun context. Note that the context is required generating the model endpoint.
     :param infer_results_df:         DataFrame that will be stored under the model endpoint parquet target. Will be
                                      used for doing the drift analysis. Please make sure that the dataframe includes
-                                     both feature names and label columns.
+                                     both feature names and label columns. If you are recording results for existing
+                                     model endpoint, the endpoint should be a batch endpoint.
     :param sample_set_statistics:    Dictionary of sample set statistics that will be used as a reference data for
                                      the current model endpoint.
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id. Enabled
@@ -169,7 +181,7 @@ def record_results(
     if drift_threshold is not None or possible_drift_threshold is not None:
         warnings.warn(
             "Custom drift threshold arguments are deprecated since version "
-            "1.7.0 and have no effect. They will be removed in version 1.9.0.\n"
+            "1.7.0 and have no effect. They will be removed in version 1.10.0.\n"
             "To enable the default histogram data drift application, run:\n"
             "`project.enable_model_monitoring()`.",
             FutureWarning,
@@ -177,7 +189,7 @@ def record_results(
     if trigger_monitoring_job is not False:
         warnings.warn(
             "`trigger_monitoring_job` argument is deprecated since version "
-            "1.7.0 and has no effect. It will be removed in version 1.9.0.\n"
+            "1.7.0 and has no effect. It will be removed in version 1.10.0.\n"
             "To enable the default histogram data drift application, run:\n"
             "`project.enable_model_monitoring()`.",
             FutureWarning,
@@ -185,13 +197,13 @@ def record_results(
     if artifacts_tag != "":
         warnings.warn(
             "`artifacts_tag` argument is deprecated since version "
-            "1.7.0 and has no effect. It will be removed in version 1.9.0.",
+            "1.7.0 and has no effect. It will be removed in version 1.10.0.",
             FutureWarning,
         )
     if default_batch_image != "mlrun/mlrun":
         warnings.warn(
             "`default_batch_image` argument is deprecated since version "
-            "1.7.0 and has no effect. It will be removed in version 1.9.0.",
+            "1.7.0 and has no effect. It will be removed in version 1.10.0.",
             FutureWarning,
         )
@@ -208,25 +220,34 @@ def record_results(
         monitoring_mode=monitoring_mode,
         db_session=db,
     )
-    logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
+    logger.debug("Model endpoint", endpoint=model_endpoint)
-    timestamp = datetime_now()
     if infer_results_df is not None:
-        # Write the monitoring parquet to the relevant model endpoint context
-        write_monitoring_df(
-            feature_set_uri=model_endpoint.status.monitoring_feature_set_uri,
-            infer_datetime=timestamp,
-            endpoint_id=model_endpoint.metadata.uid,
-            infer_results_df=infer_results_df,
-        )
+        if (
+            model_endpoint.metadata.endpoint_type
+            != mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP
+        ):
+            logger.warning(
+                "Inference results can be recorded only for batch endpoints. "
+                "Therefore the current results won't be monitored."
+            )
+        else:
+            timestamp = datetime_now()
+            # Write the monitoring parquet to the relevant model endpoint context
+            write_monitoring_df(
+                feature_set_uri=model_endpoint.spec.monitoring_feature_set_uri,
+                infer_datetime=timestamp,
+                endpoint_id=model_endpoint.metadata.uid,
+                infer_results_df=infer_results_df,
+            )
-    # Update the last request time
-    update_model_endpoint_last_request(
-        project=project,
-        model_endpoint=model_endpoint,
-        current_request=timestamp,
-        db=db,
-    )
+            # Update the last request time
+            update_model_endpoint_last_request(
+                project=project,
+                model_endpoint=model_endpoint,
+                current_request=timestamp,
+                db=db,
+            )
     return model_endpoint
@@ -234,7 +255,7 @@ def record_results(
 def _model_endpoint_validations(
     model_endpoint: ModelEndpoint,
     model_path: str = "",
-    sample_set_statistics: dict[str, typing.Any] = None,
+    sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
 ) -> None:
     """
     Validate that provided model endpoint configurations match the stored fields of the provided `ModelEndpoint`
@@ -278,7 +299,7 @@ def _model_endpoint_validations(
     # Feature stats
     if (
         sample_set_statistics
-        and sample_set_statistics != model_endpoint.status.feature_stats
+        and sample_set_statistics != model_endpoint.spec.feature_stats
     ):
         logger.warning(
             "Provided sample set statistics is different from the registered statistics. "
@@ -290,7 +311,7 @@ def write_monitoring_df(
     endpoint_id: str,
     infer_results_df: pd.DataFrame,
     infer_datetime: datetime,
-    monitoring_feature_set: typing.Optional[mlrun.feature_store.FeatureSet] = None,
+    monitoring_feature_set: typing.Optional["mlrun.feature_store.FeatureSet"] = None,
     feature_set_uri: str = "",
 ) -> None:
     """Write infer results dataframe to the monitoring parquet target of the current model endpoint. The dataframe will
@@ -330,13 +351,11 @@ def write_monitoring_df(
 def _generate_model_endpoint(
     project: str,
     db_session,
-    endpoint_id: str,
     model_path: str,
     model_endpoint_name: str,
     function_name: str,
-    context: mlrun.MLClientCtx,
-    sample_set_statistics: dict[str, typing.Any],
-    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
+    function_tag: str,
+    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
 ) -> ModelEndpoint:
     """
     Write a new model endpoint record.
@@ -344,50 +363,40 @@ def _generate_model_endpoint(
     :param project:                  Project name.
     :param db_session:               A session that manages the current dialog with the database.
-    :param endpoint_id:              Model endpoint unique ID.
     :param model_path:               The model Store path.
     :param model_endpoint_name:      Model endpoint name will be presented under the new model endpoint.
-    :param function_name:            If a new model endpoint is created, use this function name for generating the
-                                     function URI.
-    :param context:                  MLRun context. If function_name not provided, use the context to generate the
-                                     full function hash.
-    :param sample_set_statistics:    Dictionary of sample set statistics that will be used as a reference data for
-                                     the current model endpoint. Will be stored under
-                                     `model_endpoint.status.feature_stats`.
+    :param function_name:            If a new model endpoint is created, use this function name.
+    :param function_tag:             If a new model endpoint is created, use this function tag.
+    :param monitoring_mode:          Monitoring mode of the new model endpoint.
-    :return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
+    :return `mlrun.common.schemas.ModelEndpoint` object.
     """
-    model_endpoint = ModelEndpoint()
-    model_endpoint.metadata.project = project
-    model_endpoint.metadata.uid = endpoint_id
-    if function_name:
-        model_endpoint.spec.function_uri = project + "/" + function_name
-    elif not context:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            "Please provide either a function name or a valid MLRun context"
-        )
-    else:
-        model_endpoint.spec.function_uri = context.to_dict()["spec"]["function"]
-    model_endpoint.spec.model_uri = model_path
-    model_endpoint.spec.model = model_endpoint_name
-    model_endpoint.spec.model_class = "drift-analysis"
-    model_endpoint.spec.monitoring_mode = monitoring_mode
-    model_endpoint.status.first_request = model_endpoint.status.last_request = (
-        datetime_now().isoformat()
-    )
-    if sample_set_statistics:
-        model_endpoint.status.feature_stats = sample_set_statistics
-    db_session.create_model_endpoint(
-        project=project, endpoint_id=endpoint_id, model_endpoint=model_endpoint
+    current_time = datetime_now()
+    model_endpoint = mlrun.common.schemas.ModelEndpoint(
+        metadata=mlrun.common.schemas.ModelEndpointMetadata(
+            project=project,
+            name=model_endpoint_name,
+            endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
+        ),
+        spec=mlrun.common.schemas.ModelEndpointSpec(
+            function_name=function_name or "function",
+            function_tag=function_tag or "latest",
+            model_path=model_path,
+            model_class="drift-analysis",
+        ),
+        status=mlrun.common.schemas.ModelEndpointStatus(
+            monitoring_mode=monitoring_mode,
+            first_request=current_time,
+            last_request=current_time,
+        ),
     )
-    return db_session.get_model_endpoint(project=project, endpoint_id=endpoint_id)
+    return db_session.create_model_endpoint(model_endpoint=model_endpoint)
 def get_sample_set_statistics(
     sample_set: DatasetType = None,
-    model_artifact_feature_stats: dict = None,
+    model_artifact_feature_stats: typing.Optional[dict] = None,
     sample_set_columns: typing.Optional[list] = None,
     sample_set_drop_columns: typing.Optional[list] = None,
     sample_set_label_columns: typing.Optional[list] = None,
@@ -445,9 +454,9 @@ def get_sample_set_statistics(
 def read_dataset_as_dataframe(
     dataset: DatasetType,
-    feature_columns: typing.Union[str, list[str]] = None,
-    label_columns: typing.Union[str, list[str]] = None,
-    drop_columns: typing.Union[str, list[str], int, list[int]] = None,
+    feature_columns: typing.Optional[typing.Union[str, list[str]]] = None,
+    label_columns: typing.Optional[typing.Union[str, list[str]]] = None,
+    drop_columns: typing.Optional[typing.Union[str, list[str], int, list[int]]] = None,
 ) -> tuple[pd.DataFrame, list[str]]:
     """
     Parse the given dataset into a DataFrame and drop the columns accordingly. In addition, the label columns will be
@@ -531,7 +540,7 @@ def read_dataset_as_dataframe(
 def log_result(
-    context: mlrun.MLClientCtx,
+    context: "mlrun.MLClientCtx",
     result_set_name: str,
     result_set: pd.DataFrame,
     artifacts_tag: str,
@@ -559,9 +568,7 @@ def _create_model_monitoring_function_base(
     project: str,
     func: typing.Union[str, None] = None,
     application_class: typing.Union[
-        str,
-        mm_app.ModelMonitoringApplicationBase,
-        None,
+        str, "mm_app.ModelMonitoringApplicationBase", None
     ] = None,
     name: typing.Optional[str] = None,
     image: typing.Optional[str] = None,
@@ -608,8 +615,8 @@ def _create_model_monitoring_function_base(
     app_step.__class__ = mlrun.serving.MonitoringApplicationStep
     app_step.error_handler(
-        name="ApplicationErrorHandler",
         class_name="mlrun.model_monitoring.applications._application_steps._ApplicationErrorHandler",
+        name="ApplicationErrorHandler",
         full_event=True,
         project=project,
     )
@@ -618,6 +625,14 @@ def _create_model_monitoring_function_base(
         class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
         name="PushToMonitoringWriter",
         project=project,
-        writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
     )
+    def block_to_mock_server(*args, **kwargs) -> typing.NoReturn:
+        raise NotImplementedError(
+            "Model monitoring serving functions do not support `.to_mock_server`. "
+            "You may call your model monitoring application object logic via the `.evaluate` method."
+        )
+    func_obj.to_mock_server = block_to_mock_server  # Until ML-7643 is implemented
     return func_obj

mlrun/model_monitoring/applications/__init__.py CHANGED Viewed

@@ -15,9 +15,4 @@
 from .base import ModelMonitoringApplicationBase
 from .context import MonitoringApplicationContext
-from .evidently_base import (
-    _HAS_EVIDENTLY,
-    SUPPORTED_EVIDENTLY_VERSION,
-    EvidentlyModelMonitoringApplicationBase,
-)
 from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -12,56 +12,49 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import collections
 import json
 import traceback
+from collections import OrderedDict
+from datetime import datetime
 from typing import Any, Optional, Union
+import mlrun.common.schemas
 import mlrun.common.schemas.alert as alert_objects
-import mlrun.common.schemas.model_monitoring.constants as mm_constant
-import mlrun.datastore
-import mlrun.model_monitoring
-from mlrun.model_monitoring.helpers import get_stream_path
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.model_monitoring.helpers
 from mlrun.serving import GraphContext
 from mlrun.serving.utils import StepToDict
 from mlrun.utils import logger
 from .context import MonitoringApplicationContext
-from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult
+from .results import (
+    ModelMonitoringApplicationMetric,
+    ModelMonitoringApplicationResult,
+    _ModelMonitoringApplicationStats,
+)
 class _PushToMonitoringWriter(StepToDict):
     kind = "monitoring_application_stream_pusher"
-    def __init__(
-        self,
-        project: str,
-        writer_application_name: str,
-        stream_uri: Optional[str] = None,
-        name: Optional[str] = None,
-    ):
+    def __init__(self, project: str) -> None:
         """
         Class for pushing application results to the monitoring writer stream.
-        :param project:                     Project name.
-        :param writer_application_name:     Writer application name.
-        :param stream_uri:                  Stream URI for pushing results.
-        :param name:                        Name of the PushToMonitoringWriter
-                                            instance default to PushToMonitoringWriter.
+        :param project: Project name.
         """
         self.project = project
-        self.application_name_to_push = writer_application_name
-        self.stream_uri = stream_uri or get_stream_path(
-            project=self.project, function_name=self.application_name_to_push
-        )
         self.output_stream = None
-        self.name = name or "PushToMonitoringWriter"
     def do(
         self,
         event: tuple[
             list[
                 Union[
-                    ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
+                    ModelMonitoringApplicationResult,
+                    ModelMonitoringApplicationMetric,
+                    _ModelMonitoringApplicationStats,
                 ]
             ],
             MonitoringApplicationContext,
@@ -75,50 +68,50 @@ class _PushToMonitoringWriter(StepToDict):
         self._lazy_init()
         application_results, application_context = event
         writer_event = {
-            mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
-            mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
-            mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
+            mm_constants.WriterEvent.ENDPOINT_NAME: application_context.endpoint_name,
+            mm_constants.WriterEvent.APPLICATION_NAME: application_context.application_name,
+            mm_constants.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
+            mm_constants.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
-            mm_constant.WriterEvent.END_INFER_TIME: application_context.end_infer_time.isoformat(
+            mm_constants.WriterEvent.END_INFER_TIME: application_context.end_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
         }
         for result in application_results:
             data = result.to_dict()
             if isinstance(result, ModelMonitoringApplicationResult):
-                writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
-                    mm_constant.WriterEventKind.RESULT
+                writer_event[mm_constants.WriterEvent.EVENT_KIND] = (
+                    mm_constants.WriterEventKind.RESULT
                 )
-                data[mm_constant.ResultData.CURRENT_STATS] = json.dumps(
-                    application_context.sample_df_stats
+            elif isinstance(result, _ModelMonitoringApplicationStats):
+                writer_event[mm_constants.WriterEvent.EVENT_KIND] = (
+                    mm_constants.WriterEventKind.STATS
                 )
-                writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
             else:
-                writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
-                    mm_constant.WriterEventKind.METRIC
+                writer_event[mm_constants.WriterEvent.EVENT_KIND] = (
+                    mm_constants.WriterEventKind.METRIC
                 )
-                writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
-            writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
-                mm_constant.WriterEventKind.RESULT
-                if isinstance(result, ModelMonitoringApplicationResult)
-                else mm_constant.WriterEventKind.METRIC
+            writer_event[mm_constants.WriterEvent.DATA] = json.dumps(data)
+            logger.debug(
+                "Pushing data to output stream", writer_event=str(writer_event)
             )
-            logger.info(
-                f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
+            self.output_stream.push(
+                [writer_event], partition_key=application_context.endpoint_id
             )
-            self.output_stream.push([writer_event])
-            logger.info(f"Pushed data to {self.stream_uri} successfully")
+            logger.debug("Pushed data to output stream successfully")
     def _lazy_init(self):
         if self.output_stream is None:
-            self.output_stream = mlrun.datastore.get_stream_pusher(
-                self.stream_uri,
+            self.output_stream = mlrun.model_monitoring.helpers.get_output_stream(
+                project=self.project,
+                function_name=mm_constants.MonitoringFunctionNames.WRITER,
             )
 class _PrepareMonitoringEvent(StepToDict):
+    MAX_MODEL_ENDPOINTS: int = 1500
     def __init__(self, context: GraphContext, application_name: str) -> None:
         """
         Class for preparing the application event for the application step.
@@ -126,8 +119,12 @@ class _PrepareMonitoringEvent(StepToDict):
         :param application_name: Application name.
         """
         self.graph_context = context
+        _ = self.graph_context.project_obj  # Ensure project exists
         self.application_name = application_name
-        self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
+        self.model_endpoints: OrderedDict[str, mlrun.common.schemas.ModelEndpoint] = (
+            collections.OrderedDict()
+        )
+        self.feature_sets: dict[str, mlrun.common.schemas.FeatureSet] = {}
     def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
         """
@@ -136,16 +133,48 @@ class _PrepareMonitoringEvent(StepToDict):
         :param event: Application event.
         :return: Application context.
         """
-        application_context = MonitoringApplicationContext(
-            graph_context=self.graph_context,
+        endpoint_id = event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
+        endpoint_updated = datetime.fromisoformat(
+            event.get(mm_constants.ApplicationEvent.ENDPOINT_UPDATED)
+        )
+        if (
+            endpoint_id in self.model_endpoints
+            and endpoint_updated != self.model_endpoints[endpoint_id].metadata.updated
+        ):
+            logger.debug(
+                "Updated endpoint removing endpoint from cash",
+                new_updated=endpoint_updated.isoformat(),
+                old_updated=self.model_endpoints[
+                    endpoint_id
+                ].metadata.updated.isoformat(),
+            )
+            self.model_endpoints.pop(endpoint_id)
+        application_context = MonitoringApplicationContext._from_graph_ctx(
             application_name=self.application_name,
             event=event,
             model_endpoint_dict=self.model_endpoints,
+            graph_context=self.graph_context,
+            feature_sets_dict=self.feature_sets,
         )
         self.model_endpoints.setdefault(
             application_context.endpoint_id, application_context.model_endpoint
         )
+        self.feature_sets.setdefault(
+            application_context.endpoint_id, application_context.feature_set
+        )
+        # every used endpoint goes to first location allowing to pop last used:
+        self.model_endpoints.move_to_end(application_context.endpoint_id, last=False)
+        if len(self.model_endpoints) > self.MAX_MODEL_ENDPOINTS:
+            removed_endpoint_id, _ = self.model_endpoints.popitem(
+                last=True
+            )  # Removing the LRU endpoint
+            self.feature_sets.pop(removed_endpoint_id, None)
+            logger.debug(
+                "Exceeded maximum number of model endpoints removing the LRU from cash",
+                endpoint_id=removed_endpoint_id,
+            )
         return application_context
@@ -166,7 +195,9 @@ class _ApplicationErrorHandler(StepToDict):
             "Endpoint ID": event.body.endpoint_id,
             "Application Class": event.body.application_name,
             "Error": "".join(
-                traceback.format_exception(None, event.error, event.error.__traceback__)
+                traceback.format_exception(
+                    None, value=event.error, tb=event.error.__traceback__
+                )
             ),
             "Timestamp": event.timestamp,
         }

mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc4py3-none-any.whl → 1.8.0py3-none-any.whl