PyPI - mlrun - Versions diffs - 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

mlrun 1.6.4rc8py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +40 -122
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +5 -4
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +47 -257
mlrun/artifacts/dataset.py +11 -192
mlrun/artifacts/manager.py +79 -47
mlrun/artifacts/model.py +31 -159
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +74 -1
mlrun/common/db/sql_session.py +5 -5
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +45 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +33 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +12 -3
mlrun/common/model_monitoring/helpers.py +9 -5
mlrun/{runtimes → common/runtimes}/constants.py +37 -9
mlrun/common/schemas/__init__.py +31 -5
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +196 -0
mlrun/common/schemas/artifact.py +25 -4
mlrun/common/schemas/auth.py +16 -5
mlrun/common/schemas/background_task.py +1 -1
mlrun/common/schemas/client_spec.py +4 -2
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +74 -44
mlrun/common/schemas/frontend_spec.py +15 -7
mlrun/common/schemas/function.py +12 -1
mlrun/common/schemas/hub.py +11 -18
mlrun/common/schemas/memory_reports.py +2 -2
mlrun/common/schemas/model_monitoring/__init__.py +20 -4
mlrun/common/schemas/model_monitoring/constants.py +123 -42
mlrun/common/schemas/model_monitoring/grafana.py +13 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
mlrun/common/schemas/notification.py +71 -14
mlrun/common/schemas/object.py +2 -2
mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
mlrun/common/schemas/pipeline.py +8 -1
mlrun/common/schemas/project.py +69 -18
mlrun/common/schemas/runs.py +7 -1
mlrun/common/schemas/runtime_resource.py +8 -12
mlrun/common/schemas/schedule.py +4 -4
mlrun/common/schemas/tag.py +1 -2
mlrun/common/schemas/workflow.py +12 -4
mlrun/common/types.py +14 -1
mlrun/config.py +154 -69
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +67 -37
mlrun/datastore/__init__.py +6 -8
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +143 -42
mlrun/datastore/base.py +102 -58
mlrun/datastore/datastore.py +34 -13
mlrun/datastore/datastore_profile.py +146 -20
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -4
mlrun/datastore/google_cloud_storage.py +97 -33
mlrun/datastore/hdfs.py +56 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +7 -2
mlrun/datastore/s3.py +34 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +303 -111
mlrun/datastore/spark_utils.py +31 -2
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +453 -176
mlrun/datastore/utils.py +72 -58
mlrun/datastore/v3io.py +6 -1
mlrun/db/base.py +274 -41
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +893 -225
mlrun/db/nopdb.py +291 -33
mlrun/errors.py +36 -6
mlrun/execution.py +115 -42
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +65 -73
mlrun/feature_store/common.py +7 -12
mlrun/feature_store/feature_set.py +76 -55
mlrun/feature_store/feature_vector.py +39 -31
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +16 -11
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +45 -34
mlrun/features.py +11 -21
mlrun/frameworks/_common/artifacts_library.py +9 -9
mlrun/frameworks/_common/mlrun_interface.py +5 -5
mlrun/frameworks/_common/model_handler.py +48 -48
mlrun/frameworks/_common/plan.py +5 -6
mlrun/frameworks/_common/producer.py +3 -4
mlrun/frameworks/_common/utils.py +5 -5
mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
mlrun/frameworks/_ml_common/model_handler.py +24 -24
mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
mlrun/frameworks/_ml_common/plan.py +2 -2
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/_ml_common/utils.py +4 -4
mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
mlrun/frameworks/huggingface/model_server.py +4 -4
mlrun/frameworks/lgbm/__init__.py +33 -33
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
mlrun/frameworks/lgbm/model_handler.py +10 -10
mlrun/frameworks/lgbm/model_server.py +6 -6
mlrun/frameworks/lgbm/utils.py +5 -5
mlrun/frameworks/onnx/dataset.py +8 -8
mlrun/frameworks/onnx/mlrun_interface.py +3 -3
mlrun/frameworks/onnx/model_handler.py +6 -6
mlrun/frameworks/onnx/model_server.py +7 -7
mlrun/frameworks/parallel_coordinates.py +6 -6
mlrun/frameworks/pytorch/__init__.py +18 -18
mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
mlrun/frameworks/pytorch/model_handler.py +17 -17
mlrun/frameworks/pytorch/model_server.py +7 -7
mlrun/frameworks/sklearn/__init__.py +13 -13
mlrun/frameworks/sklearn/estimator.py +4 -4
mlrun/frameworks/sklearn/metrics_library.py +14 -14
mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
mlrun/frameworks/sklearn/model_handler.py +2 -2
mlrun/frameworks/tf_keras/__init__.py +10 -7
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
mlrun/frameworks/tf_keras/model_handler.py +14 -14
mlrun/frameworks/tf_keras/model_server.py +6 -6
mlrun/frameworks/xgboost/__init__.py +13 -13
mlrun/frameworks/xgboost/model_handler.py +6 -6
mlrun/k8s_utils.py +61 -17
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +16 -15
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +23 -13
mlrun/launcher/remote.py +17 -10
mlrun/lists.py +7 -6
mlrun/model.py +478 -103
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +163 -371
mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
mlrun/model_monitoring/applications/_application_steps.py +188 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +131 -278
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +134 -106
mlrun/model_monitoring/helpers.py +199 -55
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +134 -398
mlrun/model_monitoring/tracking_policy.py +9 -2
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/__init__.py +6 -6
mlrun/package/context_handler.py +5 -5
mlrun/package/packager.py +7 -7
mlrun/package/packagers/default_packager.py +8 -8
mlrun/package/packagers/numpy_packagers.py +15 -15
mlrun/package/packagers/pandas_packagers.py +5 -5
mlrun/package/packagers/python_standard_library_packagers.py +10 -10
mlrun/package/packagers_manager.py +19 -23
mlrun/package/utils/_formatter.py +6 -6
mlrun/package/utils/_pickler.py +2 -2
mlrun/package/utils/_supported_format.py +4 -4
mlrun/package/utils/log_hint_utils.py +2 -2
mlrun/package/utils/type_hint_utils.py +4 -9
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +24 -203
mlrun/projects/operations.py +52 -25
mlrun/projects/pipelines.py +191 -197
mlrun/projects/project.py +1227 -400
mlrun/render.py +16 -19
mlrun/run.py +209 -184
mlrun/runtimes/__init__.py +83 -15
mlrun/runtimes/base.py +51 -35
mlrun/runtimes/daskjob.py +17 -10
mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +40 -11
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +9 -10
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
mlrun/runtimes/nuclio/api_gateway.py +769 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
mlrun/runtimes/pod.py +281 -101
mlrun/runtimes/remotesparkjob.py +12 -9
mlrun/runtimes/sparkjob/spark3job.py +67 -51
mlrun/runtimes/utils.py +41 -75
mlrun/secrets.py +9 -5
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -7
mlrun/serving/routers.py +85 -69
mlrun/serving/server.py +69 -44
mlrun/serving/states.py +209 -36
mlrun/serving/utils.py +22 -14
mlrun/serving/v1_serving.py +6 -7
mlrun/serving/v2_serving.py +133 -54
mlrun/track/tracker.py +2 -1
mlrun/track/tracker_manager.py +3 -3
mlrun/track/trackers/mlflow_tracker.py +6 -2
mlrun/utils/async_http.py +6 -8
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +1 -2
mlrun/utils/condition_evaluator.py +3 -3
mlrun/utils/db.py +21 -3
mlrun/utils/helpers.py +405 -225
mlrun/utils/http.py +3 -6
mlrun/utils/logger.py +112 -16
mlrun/utils/notifications/notification/__init__.py +17 -13
mlrun/utils/notifications/notification/base.py +50 -2
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +3 -1
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +59 -2
mlrun/utils/notifications/notification_pusher.py +149 -30
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +208 -0
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +4 -6
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +2 -6
mlrun-1.7.0.dist-info/METADATA +378 -0
mlrun-1.7.0.dist-info/RECORD +351 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -273
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -1095
mlrun/model_monitoring/prometheus.py +0 -219
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
mlrun/platforms/other.py +0 -306
mlrun-1.6.4rc8.dist-info/METADATA +0 -272
mlrun-1.6.4rc8.dist-info/RECORD +0 -314
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -11,32 +11,30 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import concurrent.futures
 import datetime
 import json
 import os
 import re
-from typing import Any, Iterator, NamedTuple, Optional, Union, cast
+from collections.abc import Iterator
+from typing import NamedTuple, Optional, Union, cast
-from v3io.dataplane.response import HttpResponseError
+import nuclio
 import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.data_types.infer
 import mlrun.feature_store as fstore
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
+import mlrun.model_monitoring.db.stores
+from mlrun.config import config as mlconf
 from mlrun.datastore import get_stream_pusher
-from mlrun.datastore.targets import ParquetTarget
-from mlrun.model_monitoring.batch import calculate_inputs_statistics
+from mlrun.errors import err_to_str
 from mlrun.model_monitoring.helpers import (
     _BatchDict,
     batch_dict2timedelta,
-    get_monitoring_parquet_path,
     get_stream_path,
 )
-from mlrun.utils import create_logger, datetime_now, logger
-from mlrun.utils.v3io_clients import get_v3io_client
+from mlrun.utils import datetime_now, logger
 class _Interval(NamedTuple):
@@ -45,8 +43,6 @@ class _Interval(NamedTuple):
 class _BatchWindow:
-    V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
     def __init__(
         self,
         project: str,
@@ -62,27 +58,22 @@ class _BatchWindow:
         All the time values are in seconds.
         The start and stop time are in seconds since the epoch.
         """
+        self.project = project
         self._endpoint = endpoint
         self._application = application
         self._first_request = first_request
-        self._kv_storage = get_v3io_client(
-            endpoint=mlrun.mlconf.v3io_api,
-            # Avoid noisy warning logs before the KV table is created
-            logger=create_logger(name="v3io_client", level="error"),
-        ).kv
-        self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
         self._stop = last_updated
         self._step = timedelta_seconds
+        self._db = mlrun.model_monitoring.get_store_object(project=self.project)
         self._start = self._get_last_analyzed()
     def _get_last_analyzed(self) -> Optional[int]:
         try:
-            data = self._kv_storage.get(
-                container=self._v3io_container,
-                table_path=self._endpoint,
-                key=self._application,
+            last_analyzed = self._db.get_last_analyzed(
+                endpoint_id=self._endpoint,
+                application_name=self._application,
             )
-        except HttpResponseError as err:
+        except mlrun.errors.MLRunNotFoundError:
             logger.info(
                 "No last analyzed time was found for this endpoint and "
                 "application, as this is probably the first time this "
@@ -93,7 +84,7 @@ class _BatchWindow:
                 first_request=self._first_request,
                 last_updated=self._stop,
             )
-            logger.debug("Error while getting last analyzed time", err=err)
             if self._first_request and self._stop:
                 # TODO : Change the timedelta according to the policy.
                 first_period_in_seconds = max(
@@ -105,7 +96,6 @@ class _BatchWindow:
                 )
             return self._first_request
-        last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
         logger.info(
             "Got the last analyzed time for this endpoint and application",
             endpoint=self._endpoint,
@@ -121,11 +111,11 @@ class _BatchWindow:
             application=self._application,
             last_analyzed=last_analyzed,
         )
-        self._kv_storage.put(
-            container=self._v3io_container,
-            table_path=self._endpoint,
-            key=self._application,
-            attributes={mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed},
+        self._db.update_last_analyzed(
+            endpoint_id=self._endpoint,
+            application_name=self._application,
+            last_analyzed=last_analyzed,
         )
     def get_intervals(
@@ -224,7 +214,7 @@ class _BatchWindowGenerator:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
-            # `bump_model_endpoint_last_request`.
+            # `update_model_endpoint_last_request`.
             last_updated = min(int(datetime_now().timestamp()), last_updated)
             logger.debug(
                 "The endpoint does not have a stream", last_updated=last_updated
@@ -279,44 +269,26 @@ class MonitoringApplicationController:
     Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
     """
-    def __init__(
-        self,
-        context: mlrun.run.MLClientCtx,
-        project: str,
-    ):
-        """
-        Initialize Monitoring Application Processor object.
+    def __init__(self) -> None:
+        """Initialize Monitoring Application Controller"""
+        self.project = cast(str, mlrun.mlconf.default_project)
+        self.project_obj = mlrun.load_project(name=self.project, url=self.project)
-        :param context:                     An MLRun context.
-        :param project:                     Project name.
-        """
-        self.context = context
-        self.project = project
-        self.project_obj = mlrun.get_or_create_project(project)
-        context.logger.debug(f"Initializing {self.__class__.__name__}", project=project)
+        logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
-        self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
+        self.db = mlrun.model_monitoring.get_store_object(project=self.project)
         self._batch_window_generator = _BatchWindowGenerator(
-            batch_dict=context.parameters[
-                mm_constants.EventFieldType.BATCH_INTERVALS_DICT
-            ]
+            batch_dict=json.loads(
+                mlrun.get_secret_or_env(
+                    mm_constants.EventFieldType.BATCH_INTERVALS_DICT
+                )
+            )
         )
-        # If provided, only model endpoints in that that list will be analyzed
-        self.model_endpoints = context.parameters.get(
-            mm_constants.EventFieldType.MODEL_ENDPOINTS, None
-        )
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
-        self.parquet_directory = get_monitoring_parquet_path(
-            self.project_obj,
-            kind=mm_constants.FileTargetKind.APPS_PARQUET,
-        )
         self.storage_options = None
-        if not mlrun.mlconf.is_ce_mode():
-            self._initialize_v3io_configurations()
-        elif self.parquet_directory.startswith("s3://"):
+        if mlconf.artifact_path.startswith("s3://"):
             self.storage_options = mlrun.mlconf.get_s3_storage_options()
     @staticmethod
@@ -327,39 +299,60 @@ class MonitoringApplicationController:
             access_key = mlrun.mlconf.get_v3io_access_key()
         return access_key
-    def _initialize_v3io_configurations(self) -> None:
-        self.v3io_framesd = mlrun.mlconf.v3io_framesd
-        self.v3io_api = mlrun.mlconf.v3io_api
-        self.storage_options = dict(
-            v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
-        )
-    def run(self):
+    def run(self) -> None:
         """
-        Main method for run all the relevant monitoring applications on each endpoint
+        Main method for run all the relevant monitoring applications on each endpoint.
+        This method handles the following:
+        1. List model endpoints
+        2. List applications
+        3. Check model monitoring windows
+        4. Send data to applications
+        5. Delete old parquets
         """
+        logger.info("Start running monitoring controller")
         try:
-            endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
+            applications_names = []
+            endpoints = self.db.list_model_endpoints(include_stats=True)
+            if not endpoints:
+                logger.info("No model endpoints found", project=self.project)
+                return
             monitoring_functions = self.project_obj.list_model_monitoring_functions()
             if monitoring_functions:
                 applications_names = list(
                     {app.metadata.name for app in monitoring_functions}
                 )
-            else:
-                self.context.logger.info(
-                    "No monitoring functions found", project=self.project
-                )
-                applications_names = []
+            # if monitoring_functions: - TODO : ML-7700
+            #   Gets only application in ready state
+            #   applications_names = list(
+            #       {
+            #           app.metadata.name
+            #           for app in monitoring_functions
+            #           if (
+            #               app.status.state == "ready"
+            #               # workaround for the default app, as its `status.state` is `None`
+            #               or app.metadata.name
+            #               == mm_constants.HistogramDataDriftApplicationConstants.NAME
+            #           )
+            #       }
+            #   )
+            if not applications_names:
+                logger.info("No monitoring functions found", project=self.project)
+                return
+            logger.info(
+                "Starting to iterate over the applications",
+                applications=applications_names,
+            )
         except Exception as e:
-            self.context.logger.error("Failed to list endpoints", exc=e)
-            return
-        if endpoints and applications_names:
-            # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
-            pool = concurrent.futures.ProcessPoolExecutor(
-                max_workers=min(len(endpoints), 10),
+            logger.error(
+                "Failed to list endpoints and monitoring applications",
+                exc=err_to_str(e),
             )
-            futures = []
+            return
+        # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=min(len(endpoints), 10),
+        ) as pool:
             for endpoint in endpoints:
                 if (
                     endpoint[mm_constants.EventFieldType.ACTIVE]
@@ -373,27 +366,18 @@ class MonitoringApplicationController:
                     ):
                         # Router endpoint has no feature stats
                         logger.info(
-                            f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
+                            f"{endpoint[mm_constants.EventFieldType.UID]} is router, skipping"
                         )
                         continue
-                    future = pool.submit(
+                    pool.submit(
                         MonitoringApplicationController.model_endpoint_process,
                         endpoint=endpoint,
                         applications_names=applications_names,
                         batch_window_generator=self._batch_window_generator,
                         project=self.project,
-                        parquet_directory=self.parquet_directory,
-                        storage_options=self.storage_options,
                         model_monitoring_access_key=self.model_monitoring_access_key,
+                        storage_options=self.storage_options,
                     )
-                    futures.append(future)
-            for future in concurrent.futures.as_completed(futures):
-                result = future.result()
-                if result:
-                    self.context.log_results(result)
-            self._delete_old_parquet(endpoints=endpoints)
     @classmethod
     def model_endpoint_process(
@@ -402,10 +386,9 @@ class MonitoringApplicationController:
         applications_names: list[str],
         batch_window_generator: _BatchWindowGenerator,
         project: str,
-        parquet_directory: str,
-        storage_options: dict,
         model_monitoring_access_key: str,
-    ) -> Optional[dict[str, list[str]]]:
+        storage_options: Optional[dict] = None,
+    ) -> None:
         """
         Process a model endpoint and trigger the monitoring applications. This function running on different process
         for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -415,18 +398,15 @@ class MonitoringApplicationController:
         :param applications_names:          (list[str]) List of application names to push results to.
         :param batch_window_generator:      (_BatchWindowGenerator) An object that generates _BatchWindow objects.
         :param project:                     (str) Project name.
-        :param parquet_directory:           (str) Directory to store application parquet files
-        :param storage_options:             (dict) Storage options for writing ParquetTarget.
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
+        :param storage_options:             (dict) Storage options for reading the infer parquet files.
         """
         endpoint_id = endpoint[mm_constants.EventFieldType.UID]
-        start_times: set[datetime.datetime] = set()
+        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
+        m_fs = fstore.get_feature_set(
+            endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
+        )
         try:
-            m_fs = fstore.get_feature_set(
-                endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
-            )
             for application in applications_names:
                 batch_window = batch_window_generator.get_batch_window(
                     project=project,
@@ -434,171 +414,81 @@ class MonitoringApplicationController:
                     application=application,
                     first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
                     last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                    has_stream=endpoint[mm_constants.EventFieldType.STREAM_PATH] != "",
+                    has_stream=has_stream,
                 )
                 for start_infer_time, end_infer_time in batch_window.get_intervals():
-                    try:
-                        # Get application sample data
-                        offline_response = cls._get_sample_df(
-                            feature_set=m_fs,
+                    df = m_fs.to_dataframe(
+                        start_time=start_infer_time,
+                        end_time=end_infer_time,
+                        time_column=mm_constants.EventFieldType.TIMESTAMP,
+                        storage_options=storage_options,
+                    )
+                    if len(df) == 0:
+                        logger.info(
+                            "No data found for the given interval",
+                            start=start_infer_time,
+                            end=end_infer_time,
+                            endpoint_id=endpoint_id,
+                        )
+                    else:
+                        logger.info(
+                            "Data found for the given interval",
+                            start=start_infer_time,
+                            end=end_infer_time,
                             endpoint_id=endpoint_id,
+                        )
+                        cls._push_to_applications(
                             start_infer_time=start_infer_time,
                             end_infer_time=end_infer_time,
-                            parquet_directory=parquet_directory,
-                            storage_options=storage_options,
-                            application_name=application,
-                        )
-                        df = offline_response.to_dataframe()
-                        parquet_target_path = offline_response.vector.get_target_path()
-                        if len(df) == 0:
-                            logger.info(
-                                "During this time window, the endpoint has not received any data",
-                                endpoint=endpoint[mm_constants.EventFieldType.UID],
-                                start_time=start_infer_time,
-                                end_time=end_infer_time,
-                            )
-                            continue
-                    except FileNotFoundError:
-                        logger.warn(
-                            "No parquets were written yet",
-                            endpoint=endpoint[mm_constants.EventFieldType.UID],
+                            endpoint_id=endpoint_id,
+                            project=project,
+                            applications_names=[application],
+                            model_monitoring_access_key=model_monitoring_access_key,
                         )
-                        continue
-                    # Get the timestamp of the latest request:
-                    latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
-                    # Get the feature stats from the model endpoint for reference data
-                    feature_stats = json.loads(
-                        endpoint[mm_constants.EventFieldType.FEATURE_STATS]
-                    )
-                    # Pad the original feature stats to accommodate current
-                    # data out of the original range (unless already padded)
-                    pad_features_hist(FeatureStats(feature_stats))
-                    # Get the current stats:
-                    current_stats = calculate_inputs_statistics(
-                        sample_set_statistics=feature_stats,
-                        inputs=df,
-                    )
-                    cls._push_to_applications(
-                        current_stats=current_stats,
-                        feature_stats=feature_stats,
-                        start_infer_time=start_infer_time,
-                        end_infer_time=end_infer_time,
-                        endpoint_id=endpoint_id,
-                        latest_request=latest_request,
-                        project=project,
-                        applications_names=[application],
-                        model_monitoring_access_key=model_monitoring_access_key,
-                        parquet_target_path=parquet_target_path,
-                    )
-                    start_times.add(start_infer_time)
         except Exception:
             logger.exception(
                 "Encountered an exception",
                 endpoint_id=endpoint[mm_constants.EventFieldType.UID],
             )
-        if start_times:
-            return {endpoint_id: [str(t) for t in sorted(list(start_times))]}
-    def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
-        """
-        Delete application parquets older than the argument days.
-        :param endpoints: A list of dictionaries of model endpoints records.
-        """
-        if self.parquet_directory.startswith("v3io:///"):
-            # create fs with access to the user side (under projects)
-            store, _ = mlrun.store_manager.get_or_create_store(
-                self.parquet_directory,
-                {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
-            )
-            fs = store.filesystem
-            # calculate time threshold (keep only files from the last 24 hours)
-            time_to_keep = (
-                datetime.datetime.now(tz=datetime.timezone.utc)
-                - datetime.timedelta(days=days)
-            ).timestamp()
-            for endpoint in endpoints:
-                try:
-                    apps_parquet_directories = fs.listdir(
-                        path=f"{self.parquet_directory}"
-                        f"/key={endpoint[mm_constants.EventFieldType.UID]}"
-                    )
-                    for directory in apps_parquet_directories:
-                        if directory["mtime"] < time_to_keep:
-                            # Delete files
-                            fs.rm(path=directory["name"], recursive=True)
-                            # Delete directory
-                            fs.rmdir(path=directory["name"])
-                except FileNotFoundError:
-                    logger.info(
-                        "Application parquet directory is empty, "
-                        "probably parquets have not yet been created for this app",
-                        endpoint=endpoint[mm_constants.EventFieldType.UID],
-                        path=f"{self.parquet_directory}"
-                        f"/key={endpoint[mm_constants.EventFieldType.UID]}",
-                    )
     @staticmethod
     def _push_to_applications(
-        current_stats,
-        feature_stats,
-        start_infer_time,
-        end_infer_time,
-        endpoint_id,
-        latest_request,
-        project,
-        applications_names,
-        model_monitoring_access_key,
-        parquet_target_path,
+        start_infer_time: datetime.datetime,
+        end_infer_time: datetime.datetime,
+        endpoint_id: str,
+        project: str,
+        applications_names: list[str],
+        model_monitoring_access_key: str,
     ):
         """
         Pushes data to multiple stream applications.
-        :param current_stats:       Current statistics of input data.
-        :param feature_stats:       Statistics of train features.
-        :param start_infer_time:    The beginning of the infer interval window.
-        :param end_infer_time:      The end of the infer interval window.
-        :param endpoint_id:         Identifier for the model endpoint.
-        :param latest_request:      Timestamp of the latest model request.
-        :param project: mlrun       Project name.
-        :param applications_names:  List of application names to which data will be pushed.
+        :param start_infer_time:            The beginning of the infer interval window.
+        :param end_infer_time:              The end of the infer interval window.
+        :param endpoint_id:                 Identifier for the model endpoint.
+        :param project: mlrun               Project name.
+        :param applications_names:          List of application names to which data will be pushed.
+        :param model_monitoring_access_key: Access key to apply the model monitoring process.
         """
         data = {
-            mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
-            mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
-            mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: parquet_target_path,
             mm_constants.ApplicationEvent.START_INFER_TIME: start_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.END_INFER_TIME: end_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
-            mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
-                sep=" ", timespec="microseconds"
-            ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
-                application_name=mm_constants.MonitoringFunctionNames.WRITER,
+                function_name=mm_constants.MonitoringFunctionNames.WRITER,
             ),
         }
         for app_name in applications_names:
             data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
-            stream_uri = get_stream_path(project=project, application_name=app_name)
+            stream_uri = get_stream_path(project=project, function_name=app_name)
             logger.info(
                 f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
@@ -607,49 +497,12 @@ class MonitoringApplicationController:
                 [data]
             )
-    @staticmethod
-    def _get_sample_df(
-        feature_set: mlrun.common.schemas.FeatureSet,
-        endpoint_id: str,
-        start_infer_time: datetime.datetime,
-        end_infer_time: datetime.datetime,
-        parquet_directory: str,
-        storage_options: dict,
-        application_name: str,
-    ) -> mlrun.feature_store.OfflineVectorResponse:
-        """
-        Retrieves a sample DataFrame of the current input according to the provided infer interval window.
-        :param feature_set:         The main feature set.
-        :param endpoint_id:         Identifier for the model endpoint.
-        :param start_infer_time:    The beginning of the infer interval window.
-        :param end_infer_time:      The end of the infer interval window.
-        :param parquet_directory:   Directory where Parquet files are stored.
-        :param storage_options:     Storage options for accessing the data.
-        :param application_name:    Current application name.
-        :return: OfflineVectorResponse that can be used for generating a sample DataFrame for the specified endpoint.
+def handler(context: nuclio.Context, event: nuclio.Event) -> None:
+    """
+    Run model monitoring application processor
-        """
-        features = [f"{feature_set.metadata.name}.*"]
-        vector = fstore.FeatureVector(
-            name=f"{endpoint_id}_vector",
-            features=features,
-            with_indexes=True,
-        )
-        vector.metadata.tag = application_name
-        vector.feature_set_objects = {feature_set.metadata.name: feature_set}
-        # get offline features based on application start and end time.
-        # store the result parquet by partitioning by controller end processing time
-        offline_response = vector.get_offline_features(
-            start_time=start_infer_time,
-            end_time=end_infer_time,
-            timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
-            target=ParquetTarget(
-                path=parquet_directory
-                + f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
-                storage_options=storage_options,
-            ),
-        )
-        return offline_response
+    :param context: the Nuclio context
+    :param event:   trigger event
+    """
+    MonitoringApplicationController().run()

mlrun/model_monitoring/db/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .stores import ObjectStoreFactory, get_store_object
+from .stores.base import StoreBase
+from .tsdb import get_tsdb_connector
+from .tsdb.base import TSDBConnector

mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.4rc8py3-none-any.whl → 1.7.0py3-none-any.whl