PyPI - mlrun - Versions diffs - 1.8.0rc45__py3-none-any.whl → 1.8.0rc47__py3-none-any.whl - Mend

mlrun 1.8.0rc45py3-none-any.whl → 1.8.0rc47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (27) hide show

mlrun/alerts/alert.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +5 -0
mlrun/config.py +2 -0
mlrun/data_types/__init__.py +5 -1
mlrun/datastore/targets.py +7 -5
mlrun/model_monitoring/api.py +31 -18
mlrun/model_monitoring/applications/context.py +14 -1
mlrun/model_monitoring/applications/evidently/base.py +38 -0
mlrun/model_monitoring/controller.py +208 -84
mlrun/model_monitoring/db/_schedules.py +110 -32
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +6 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +11 -5
mlrun/model_monitoring/helpers.py +46 -53
mlrun/projects/project.py +29 -24
mlrun/runtimes/function_reference.py +3 -0
mlrun/runtimes/nuclio/function.py +48 -0
mlrun/runtimes/nuclio/serving.py +16 -1
mlrun/serving/states.py +48 -27
mlrun/serving/v2_serving.py +51 -1
mlrun/utils/helpers.py +5 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/METADATA +5 -5
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/RECORD +27 -27
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.8.0rc45.dist-info → mlrun-1.8.0rc47.dist-info}/top_level.txt +0 -0

mlrun/alerts/alert.py CHANGED Viewed

@@ -112,7 +112,7 @@ class AlertConfig(ModelObj):
                                complex trigger which is based on a prometheus alert
         :param criteria:       When the alert will be triggered based on the specified number of events within the
                                defined time period.
-        :param reset_policy:   When to clear the alert. May be "manual" for manual reset of the alert, or
+        :param reset_policy:   When to clear the alert. Either "manual" for manual reset of the alert, or
                                "auto" if the criteria contains a time period
         :param notifications:  List of notifications to invoke once the alert is triggered
         :param entities:       Entities that the event relates to. The entity object will contain fields that

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -289,6 +289,11 @@ class ModelMonitoringMode(StrEnum):
     disabled = "disabled"
+class ScheduleChiefFields(StrEnum):
+    LAST_REQUEST = "last_request"
+    LAST_ANALYZED = "last_analyzed"
 class EndpointType(IntEnum):
     NODE_EP = 1  # end point that is not a child of a router
     ROUTER = 2  # endpoint that is router

mlrun/config.py CHANGED Viewed

@@ -631,6 +631,8 @@ default_config = {
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
         "tdengine": {
+            "run_directly": True,
+            # timeout and retry are ignored when run_directly is set to True
             "timeout": 10,
             "retries": 1,
         },

mlrun/data_types/__init__.py CHANGED Viewed

@@ -27,8 +27,12 @@ class BaseDataInfer:
     get_stats = None
+def is_spark_dataframe(df) -> bool:
+    return "rdd" in dir(df)
 def get_infer_interface(df) -> BaseDataInfer:
-    if hasattr(df, "rdd"):
+    if is_spark_dataframe(df):
         from .spark import SparkDataInfer
         return SparkDataInfer

mlrun/datastore/targets.py CHANGED Viewed

@@ -40,7 +40,7 @@ from mlrun.utils.helpers import to_parquet
 from mlrun.utils.v3io_clients import get_frames_client
 from .. import errors
-from ..data_types import ValueType
+from ..data_types import ValueType, is_spark_dataframe
 from ..platforms.iguazio import parse_path, split_path
 from .datastore_profile import datastore_profile_read
 from .spark_utils import spark_session_update_hadoop_options
@@ -86,8 +86,10 @@ def generate_target_run_id():
 def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
+    # TODO: Replace with just df.sparkSession when Spark 3.2 support is dropped
+    spark_session = getattr(df, "sparkSession", None) or df.sql_ctx.sparkSession
     non_hadoop_spark_options = spark_session_update_hadoop_options(
-        df.sql_ctx.sparkSession, spark_options
+        spark_session, spark_options
     )
     if write_format:
         df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
@@ -510,7 +512,7 @@ class BaseStoreTarget(DataTargetBase):
         chunk_id=0,
         **kwargs,
     ) -> Optional[int]:
-        if hasattr(df, "rdd"):
+        if is_spark_dataframe(df):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df, key_column, timestamp_key, options)
@@ -1376,7 +1378,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
-        if hasattr(df, "rdd"):
+        if is_spark_dataframe(df):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df)
@@ -2108,7 +2110,7 @@ class SQLTarget(BaseStoreTarget):
         self._create_sql_table()
-        if hasattr(df, "rdd"):
+        if is_spark_dataframe(df):
             raise ValueError("Spark is not supported")
         else:
             (

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -50,8 +50,8 @@ DatasetType = typing.Union[
 def get_or_create_model_endpoint(
     project: str,
+    model_endpoint_name: str,
     model_path: str = "",
-    model_endpoint_name: str = "",
     endpoint_id: str = "",
     function_name: str = "",
     function_tag: str = "latest",
@@ -59,6 +59,7 @@ def get_or_create_model_endpoint(
     sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
     monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
     db_session=None,
+    feature_analysis: bool = False,
 ) -> ModelEndpoint:
     """
     Get a single model endpoint object. If not exist, generate a new model endpoint with the provided parameters. Note
@@ -66,9 +67,9 @@ def get_or_create_model_endpoint(
     features, set `monitoring_mode=enabled`.
     :param project:                  Project name.
-    :param model_path:               The model store path (applicable only to new endpoint_id).
     :param model_endpoint_name:      If a new model endpoint is created, the model endpoint name will be presented
                                      under this endpoint (applicable only to new endpoint_id).
+    :param model_path:               The model store path (applicable only to new endpoint_id).
     :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
                                      on the provided `endpoint_id`.
     :param function_name:            If a new model endpoint is created, use this function name.
@@ -80,6 +81,7 @@ def get_or_create_model_endpoint(
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id
                                      (applicable only to new endpoint_id).
     :param db_session:               A runtime session that manages the current dialog with the database.
+    :param feature_analysis:         If True, the model endpoint will be retrieved with the feature analysis mode.
     :return: A ModelEndpoint object
     """
@@ -99,6 +101,7 @@ def get_or_create_model_endpoint(
             endpoint_id=endpoint_id,
             function_name=function_name,
             function_tag=function_tag or "latest",
+            feature_analysis=feature_analysis,
         )
         # If other fields provided, validate that they are correspond to the existing model endpoint data
         _model_endpoint_validations(
@@ -157,7 +160,8 @@ def record_results(
     :param context:                  MLRun context. Note that the context is required generating the model endpoint.
     :param infer_results_df:         DataFrame that will be stored under the model endpoint parquet target. Will be
                                      used for doing the drift analysis. Please make sure that the dataframe includes
-                                     both feature names and label columns.
+                                     both feature names and label columns. If you are recording results for existing
+                                     model endpoint, the endpoint should be a batch endpoint.
     :param sample_set_statistics:    Dictionary of sample set statistics that will be used as a reference data for
                                      the current model endpoint.
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id. Enabled
@@ -218,23 +222,32 @@ def record_results(
     )
     logger.debug("Model endpoint", endpoint=model_endpoint)
-    timestamp = datetime_now()
     if infer_results_df is not None:
-        # Write the monitoring parquet to the relevant model endpoint context
-        write_monitoring_df(
-            feature_set_uri=model_endpoint.spec.monitoring_feature_set_uri,
-            infer_datetime=timestamp,
-            endpoint_id=model_endpoint.metadata.uid,
-            infer_results_df=infer_results_df,
-        )
+        if (
+            model_endpoint.metadata.endpoint_type
+            != mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP
+        ):
+            logger.warning(
+                "Inference results can be recorded only for batch endpoints. "
+                "Therefore the current results won't be monitored."
+            )
+        else:
+            timestamp = datetime_now()
+            # Write the monitoring parquet to the relevant model endpoint context
+            write_monitoring_df(
+                feature_set_uri=model_endpoint.spec.monitoring_feature_set_uri,
+                infer_datetime=timestamp,
+                endpoint_id=model_endpoint.metadata.uid,
+                infer_results_df=infer_results_df,
+            )
-    # Update the last request time
-    update_model_endpoint_last_request(
-        project=project,
-        model_endpoint=model_endpoint,
-        current_request=timestamp,
-        db=db,
-    )
+            # Update the last request time
+            update_model_endpoint_last_request(
+                project=project,
+                model_endpoint=model_endpoint,
+                current_request=timestamp,
+                db=db,
+            )
     return model_endpoint

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -76,7 +76,6 @@ class MonitoringApplicationContext:
         :param sample_df:               (pd.DataFrame) The new sample DataFrame.
         :param start_infer_time:        (pd.Timestamp) Start time of the monitoring schedule.
         :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
-        :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
         :param endpoint_id:             (str) ID of the monitored model endpoint
         :param feature_set:              (FeatureSet) the model endpoint feature set
         :param endpoint_name:           (str) Name of the monitored model endpoint
@@ -208,6 +207,20 @@ class MonitoringApplicationContext:
     @property
     def sample_df(self) -> pd.DataFrame:
         if self._sample_df is None:
+            if (
+                self.endpoint_name is None
+                or self.endpoint_id is None
+                or pd.isnull(self.start_infer_time)
+                or pd.isnull(self.end_infer_time)
+            ):
+                raise mlrun.errors.MLRunValueError(
+                    "You have tried to access `monitoring_context.sample_df`, but have not provided it directly "
+                    "through `sample_data`, nor have you provided the model endpoint's name, ID, and the start and "
+                    f"end times: `endpoint_name`={self.endpoint_name}, `endpoint_uid`={self.endpoint_id}, "
+                    f"`start`={self.start_infer_time}, and `end`={self.end_infer_time}. "
+                    "You can either provide the sample dataframe directly, the model endpoint's details and times, "
+                    "or adapt the application's logic to not access the sample dataframe."
+                )
             feature_set = self.feature_set
             features = [f"{feature_set.metadata.name}.*"]
             vector = fstore.FeatureVector(

mlrun/model_monitoring/applications/evidently/base.py CHANGED Viewed

@@ -12,12 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
+import posixpath
 import uuid
 import warnings
 from abc import ABC
 import pandas as pd
 import semver
+from evidently.ui.storage.local.base import METADATA_PATH, FSLocation
 import mlrun.model_monitoring.applications.base as mm_base
 import mlrun.model_monitoring.applications.context as mm_context
@@ -81,12 +84,47 @@ class EvidentlyModelMonitoringApplicationBase(
         # TODO : more then one project (mep -> project)
         if not _HAS_EVIDENTLY:
             raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
+        self._log_location(evidently_workspace_path)
         self.evidently_workspace = Workspace.create(evidently_workspace_path)
         self.evidently_project_id = evidently_project_id
         self.evidently_project = self.evidently_workspace.get_project(
             evidently_project_id
         )
+    @staticmethod
+    def _log_location(evidently_workspace_path):
+        # TODO remove function + usage after solving issue ML-9530
+        location = FSLocation(base_path=evidently_workspace_path)
+        location.invalidate_cache("")
+        paths = [p for p in location.listdir("") if location.isdir(p)]
+        for path in paths:
+            metadata_path = posixpath.join(path, METADATA_PATH)
+            full_path = posixpath.join(location.path, metadata_path)
+            print(f"evidently json issue, working on path: {full_path}")
+            try:
+                with location.open(metadata_path) as f:
+                    content = json.load(f)
+                    print(
+                        f"evidently json issue, successful load path: {full_path}, content: {content}"
+                    )
+            except FileNotFoundError:
+                print(f"evidently json issue, path not found: {full_path}")
+                continue
+            except json.decoder.JSONDecodeError as json_error:
+                print(
+                    f"evidently json issue, path got json error, path:{full_path}, error: {json_error}"
+                )
+                print("evidently json issue, file content:")
+                with location.open(metadata_path) as f:
+                    print(f.read())
+                continue
+            except Exception as error:
+                print(
+                    f"evidently json issue, path got general error, path:{full_path}, error: {error}"
+                )
+                continue
     @staticmethod
     def log_evidently_object(
         monitoring_context: mm_context.MonitoringApplicationContext,

mlrun 1.8.0rc45__py3-none-any.whl → 1.8.0rc47__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc45py3-none-any.whl → 1.8.0rc47py3-none-any.whl