PyPI - mlrun - Versions diffs - 1.7.0rc6__py3-none-any.whl → 1.7.0rc8__py3-none-any.whl - Mend

mlrun 1.7.0rc6py3-none-any.whl → 1.7.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (70) hide show

mlrun/__main__.py +2 -0
mlrun/common/constants.py +6 -0
mlrun/common/schemas/__init__.py +3 -0
mlrun/common/schemas/api_gateway.py +8 -1
mlrun/common/schemas/model_monitoring/__init__.py +4 -0
mlrun/common/schemas/model_monitoring/constants.py +35 -18
mlrun/common/schemas/project.py +1 -0
mlrun/common/types.py +7 -1
mlrun/config.py +34 -10
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +22 -16
mlrun/datastore/datastore.py +4 -0
mlrun/datastore/datastore_profile.py +7 -0
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/sources.py +2 -3
mlrun/datastore/targets.py +6 -1
mlrun/db/base.py +14 -6
mlrun/db/httpdb.py +61 -56
mlrun/db/nopdb.py +3 -0
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +6 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +20 -8
mlrun/kfpops.py +2 -5
mlrun/model.py +1 -0
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +104 -295
mlrun/model_monitoring/controller.py +25 -25
mlrun/model_monitoring/db/__init__.py +16 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
mlrun/model_monitoring/helpers.py +3 -3
mlrun/model_monitoring/stream_processing.py +41 -9
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +4 -36
mlrun/projects/pipelines.py +14 -2
mlrun/projects/project.py +118 -103
mlrun/run.py +5 -1
mlrun/runtimes/base.py +6 -0
mlrun/runtimes/nuclio/api_gateway.py +218 -65
mlrun/runtimes/nuclio/function.py +3 -0
mlrun/runtimes/nuclio/serving.py +28 -32
mlrun/runtimes/pod.py +26 -0
mlrun/serving/routers.py +4 -3
mlrun/serving/server.py +4 -6
mlrun/serving/states.py +34 -14
mlrun/serving/v2_serving.py +4 -3
mlrun/utils/helpers.py +34 -0
mlrun/utils/http.py +1 -1
mlrun/utils/retryer.py +1 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/METADATA +25 -16
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/RECORD +66 -62
mlrun/model_monitoring/batch.py +0 -933
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/mysql.py +0 -34
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} RENAMED Viewed

@@ -16,17 +16,18 @@
 import json
 import os
 import typing
+from http import HTTPStatus
 import v3io.dataplane
+import v3io.dataplane.response
 import v3io_frames
 import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.model_monitoring
+import mlrun.model_monitoring.db
 import mlrun.utils.v3io_clients
 from mlrun.utils import logger
-from .model_endpoint_store import ModelEndpointStore
 # Fields to encode before storing in the KV table or to decode after retrieving
 fields_to_encode_decode = [
     mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS,
@@ -34,7 +35,7 @@ fields_to_encode_decode = [
 ]
-class KVModelEndpointStore(ModelEndpointStore):
+class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
     """
     Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
     client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
@@ -394,6 +395,128 @@ class KVModelEndpointStore(ModelEndpointStore):
         return metrics_mapping
+    def write_application_result(self, event: dict[str, typing.Any]):
+        """
+        Write a new application result event in the target table.
+        :param event: An event dictionary that represents the application result, should be corresponded to the
+                      schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
+                      object.
+        """
+        endpoint_id = event.pop(
+            mlrun.common.schemas.model_monitoring.WriterEvent.ENDPOINT_ID
+        )
+        app_name = event.pop(
+            mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME
+        )
+        metric_name = event.pop(
+            mlrun.common.schemas.model_monitoring.WriterEvent.RESULT_NAME
+        )
+        attributes = {metric_name: json.dumps(event)}
+        v3io_monitoring_apps_container = self.get_v3io_monitoring_apps_container(
+            project_name=self.project
+        )
+        self.client.kv.update(
+            container=v3io_monitoring_apps_container,
+            table_path=endpoint_id,
+            key=app_name,
+            attributes=attributes,
+        )
+        schema_file = self.client.kv.new_cursor(
+            container=v3io_monitoring_apps_container,
+            table_path=endpoint_id,
+            filter_expression='__name==".#schema"',
+        )
+        if not schema_file.all():
+            logger.info(
+                "Generate a new V3IO KV schema file",
+                container=v3io_monitoring_apps_container,
+                endpoint_id=endpoint_id,
+            )
+            self._generate_kv_schema(endpoint_id, v3io_monitoring_apps_container)
+        logger.info("Updated V3IO KV successfully", key=app_name)
+    def _generate_kv_schema(
+        self, endpoint_id: str, v3io_monitoring_apps_container: str
+    ):
+        """Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
+        fields = [
+            {
+                "name": mlrun.common.schemas.model_monitoring.WriterEvent.RESULT_NAME,
+                "type": "string",
+                "nullable": False,
+            }
+        ]
+        res = self.client.kv.create_schema(
+            container=v3io_monitoring_apps_container,
+            table_path=endpoint_id,
+            key=mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME,
+            fields=fields,
+        )
+        if res.status_code != HTTPStatus.OK:
+            raise mlrun.errors.MLRunBadRequestError(
+                f"Couldn't infer schema for endpoint {endpoint_id} which is required for Grafana dashboards"
+            )
+        else:
+            logger.info(
+                "Generated V3IO KV schema successfully", endpoint_id=endpoint_id
+            )
+    def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
+        """
+        Get the last analyzed time for the provided model endpoint and application.
+        :param endpoint_id:      The unique id of the model endpoint.
+        :param application_name: Registered application name.
+        :return: Timestamp as a Unix time.
+        :raise:  MLRunNotFoundError if last analyzed value is not found.
+        """
+        try:
+            data = self.client.kv.get(
+                container=self._get_monitoring_schedules_container(
+                    project_name=self.project
+                ),
+                table_path=endpoint_id,
+                key=application_name,
+            )
+            return data.output.item[
+                mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED
+            ]
+        except v3io.dataplane.response.HttpResponseError as err:
+            logger.debug("Error while getting last analyzed time", err=err)
+            raise mlrun.errors.MLRunNotFoundError(
+                f"No last analyzed value has been found for {application_name} "
+                f"that processes model endpoint {endpoint_id}",
+            )
+    def update_last_analyzed(
+        self, endpoint_id: str, application_name: str, last_analyzed: int
+    ):
+        """
+        Update the last analyzed time for the provided model endpoint and application.
+        :param endpoint_id:      The unique id of the model endpoint.
+        :param application_name: Registered application name.
+        :param last_analyzed:    Timestamp as a Unix time that represents the last analyzed time of a certain
+                                 application and model endpoint.
+        """
+        self.client.kv.put(
+            container=self._get_monitoring_schedules_container(
+                project_name=self.project
+            ),
+            table_path=endpoint_id,
+            key=application_name,
+            attributes={
+                mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED: last_analyzed
+            },
+        )
     def _generate_tsdb_paths(self) -> tuple[str, str]:
         """Generate a short path to the TSDB resources and a filtered path for the frames object
         :return: A tuple of:
@@ -572,3 +695,11 @@ class KVModelEndpointStore(ModelEndpointStore):
         if isinstance(field, bytes):
             return field.decode()
         return field
+    @staticmethod
+    def get_v3io_monitoring_apps_container(project_name: str) -> str:
+        return f"users/pipelines/{project_name}/monitoring-apps"
+    @staticmethod
+    def _get_monitoring_schedules_container(project_name: str) -> str:
+        return f"users/pipelines/{project_name}/monitoring-schedules/functions"

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -42,7 +42,7 @@ class _BatchDict(typing.TypedDict):
 def get_stream_path(
     project: str = None,
     function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
-):
+) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
@@ -54,8 +54,6 @@ def get_stream_path(
     stream_uri = mlrun.get_secret_or_env(
         mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
-        if function_name is mm_constants.MonitoringFunctionNames.STREAM
-        else ""
     ) or mlrun.mlconf.get_model_monitoring_file_target_path(
         project=project,
         kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
@@ -63,6 +61,8 @@ def get_stream_path(
         function_name=function_name,
     )
+    if isinstance(stream_uri, list):  # ML-6043 - user side gets only the new stream uri
+        stream_uri = stream_uri[1]  # get new stream path, under projects
     return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
         stream_uri=stream_uri, project=project, function_name=function_name
     )

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -24,7 +24,9 @@ import mlrun
 import mlrun.common.model_monitoring.helpers
 import mlrun.config
 import mlrun.datastore.targets
+import mlrun.feature_store as fstore
 import mlrun.feature_store.steps
+import mlrun.model_monitoring.db
 import mlrun.model_monitoring.prometheus
 import mlrun.serving.states
 import mlrun.utils
@@ -36,6 +38,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
     FileTargetKind,
     ModelEndpointTarget,
     ProjectSecretKeys,
+    PrometheusEndpoints,
 )
 from mlrun.utils import logger
@@ -183,11 +186,11 @@ class EventStreamProcessor:
         # Step 2 - Filter out events with '-' in the path basename from going forward
         # through the next steps of the stream graph
         def apply_storey_filter_stream_events():
-            # Remove none values from each event
+            # Filter events with Prometheus endpoints path
             graph.add_step(
                 "storey.Filter",
                 "filter_stream_event",
-                _fn="('-' not in event.path.split('/')[-1])",
+                _fn=f"(event.path not in {PrometheusEndpoints.list()})",
                 full_event=True,
             )
@@ -933,6 +936,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
     def do(self, event: dict):
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        feature_values = event[EventFieldType.FEATURES]
+        label_values = event[EventFieldType.PREDICTION]
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
             endpoint_record = get_endpoint_record(
@@ -968,6 +973,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     },
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=feature_names,
+                    feature_values=feature_values,
+                )
             # Similar process with label columns
             if not label_columns and self._infer_columns_from_data:
                 label_columns = self._infer_label_columns_from_data(event)
@@ -986,6 +997,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     endpoint_id=endpoint_id,
                     attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=label_columns,
+                    feature_values=label_values,
+                )
             self.label_columns[endpoint_id] = label_columns
             self.feature_names[endpoint_id] = feature_names
@@ -1003,7 +1019,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
         feature_names = self.feature_names[endpoint_id]
-        feature_values = event[EventFieldType.FEATURES]
         self._map_dictionary_values(
             event=event,
             named_iters=feature_names,
@@ -1013,7 +1028,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add label_name:value pairs along with a mapping dictionary of all of these pairs
         label_names = self.label_columns[endpoint_id]
-        label_values = event[EventFieldType.PREDICTION]
         self._map_dictionary_values(
             event=event,
             named_iters=label_names,
@@ -1139,10 +1153,10 @@ class EventRouting(mlrun.feature_store.steps.MapClass):
         self.project: str = project
     def do(self, event):
-        if event.path == "/model-monitoring-metrics":
+        if event.path == PrometheusEndpoints.MODEL_MONITORING_METRICS:
             # Return a parsed Prometheus registry file
             event.body = mlrun.model_monitoring.prometheus.get_registry()
-        elif event.path == "/monitoring-batch-metrics":
+        elif event.path == PrometheusEndpoints.MONITORING_BATCH_METRICS:
             # Update statistical metrics
             for event_metric in event.body:
                 mlrun.model_monitoring.prometheus.write_drift_metrics(
@@ -1151,7 +1165,7 @@ class EventRouting(mlrun.feature_store.steps.MapClass):
                     metric=event_metric[EventFieldType.METRIC],
                     value=event_metric[EventFieldType.VALUE],
                 )
-        elif event.path == "/monitoring-drift-status":
+        elif event.path == PrometheusEndpoints.MONITORING_DRIFT_STATUS:
             # Update drift status
             mlrun.model_monitoring.prometheus.write_drift_status(
                 project=self.project,
@@ -1211,7 +1225,7 @@ def update_endpoint_record(
     endpoint_id: str,
     attributes: dict,
 ):
-    model_endpoint_store = mlrun.model_monitoring.get_model_endpoint_store(
+    model_endpoint_store = mlrun.model_monitoring.get_store_object(
         project=project,
     )
@@ -1221,7 +1235,25 @@ def update_endpoint_record(
 def get_endpoint_record(project: str, endpoint_id: str):
-    model_endpoint_store = mlrun.model_monitoring.get_model_endpoint_store(
+    model_endpoint_store = mlrun.model_monitoring.get_store_object(
         project=project,
     )
     return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
+def update_monitoring_feature_set(
+    endpoint_record: dict[str, typing.Any],
+    feature_names: list[str],
+    feature_values: list[typing.Any],
+):
+    monitoring_feature_set = fstore.get_feature_set(
+        endpoint_record[
+            mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
+        ]
+    )
+    for name, val in zip(feature_names, feature_values):
+        monitoring_feature_set.add_feature(
+            fstore.Feature(name=name, value_type=type(val))
+        )
+    monitoring_feature_set.save()

mlrun/model_monitoring/tracking_policy.py CHANGED Viewed

@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+import warnings
 from typing import Union
 import mlrun.common.schemas.schedule
@@ -55,6 +55,12 @@ class TrackingPolicy(mlrun.model.ModelObj):
                                             writer function, which is a real time nuclio functino, will be deployed
                                             with the same image. By default, the image is mlrun/mlrun.
         """
+        warnings.warn(
+            "The `TrackingPolicy` class is deprecated from version 1.7.0 and is not "
+            "used anymore. It will be removed in 1.9.0.",
+            FutureWarning,
+        )
         if isinstance(default_batch_intervals, str):
             default_batch_intervals = (
                 mlrun.common.schemas.schedule.ScheduleCronTrigger.from_crontab(

mlrun/model_monitoring/writer.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import datetime
 import json
-from http import HTTPStatus
 from typing import Any, NewType
 import pandas as pd
@@ -25,6 +24,7 @@ from v3io_frames.frames_pb2 import IGNORE
 import mlrun.common.model_monitoring
 import mlrun.model_monitoring
+import mlrun.model_monitoring.db.stores
 import mlrun.utils.v3io_clients
 from mlrun.common.schemas.model_monitoring.constants import ResultStatusApp, WriterEvent
 from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
@@ -106,13 +106,11 @@ class ModelMonitoringWriter(StepToDict):
         self.project = project
         self.name = project  # required for the deployment process
         self._v3io_container = self.get_v3io_container(self.name)
-        self._kv_client = self._get_v3io_client().kv
         self._tsdb_client = self._get_v3io_frames_client(self._v3io_container)
         self._custom_notifier = CustomNotificationPusher(
             notification_types=[NotificationKind.slack]
         )
         self._create_tsdb_table()
-        self._kv_schemas = []
     @staticmethod
     def get_v3io_container(project_name: str) -> str:
@@ -141,40 +139,10 @@ class ModelMonitoringWriter(StepToDict):
     def _update_kv_db(self, event: _AppResultEvent) -> None:
         event = _AppResultEvent(event.copy())
-        endpoint_id = event.pop(WriterEvent.ENDPOINT_ID)
-        app_name = event.pop(WriterEvent.APPLICATION_NAME)
-        metric_name = event.pop(WriterEvent.RESULT_NAME)
-        attributes = {metric_name: json.dumps(event)}
-        self._kv_client.update(
-            container=self._v3io_container,
-            table_path=endpoint_id,
-            key=app_name,
-            attributes=attributes,
+        application_result_store = mlrun.model_monitoring.get_store_object(
+            project=self.project
         )
-        if endpoint_id not in self._kv_schemas:
-            self._generate_kv_schema(endpoint_id)
-        logger.info("Updated V3IO KV successfully", key=app_name)
-    def _generate_kv_schema(self, endpoint_id: str):
-        """Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
-        fields = [
-            {"name": WriterEvent.RESULT_NAME, "type": "string", "nullable": False}
-        ]
-        res = self._kv_client.create_schema(
-            container=self._v3io_container,
-            table_path=endpoint_id,
-            key=WriterEvent.APPLICATION_NAME,
-            fields=fields,
-        )
-        if res.status_code != HTTPStatus.OK.value:
-            raise mlrun.errors.MLRunBadRequestError(
-                f"Couldn't infer schema for endpoint {endpoint_id} which is required for Grafana dashboards"
-            )
-        else:
-            logger.info(
-                "Generated V3IO KV schema successfully", endpoint_id=endpoint_id
-            )
-            self._kv_schemas.append(endpoint_id)
+        application_result_store.write_application_result(event=event)
     def _update_tsdb(self, event: _AppResultEvent) -> None:
         event = _AppResultEvent(event.copy())

mlrun/projects/pipelines.py CHANGED Viewed

@@ -412,6 +412,11 @@ def enrich_function_object(
     if decorator:
         decorator(f)
+    if project.spec.default_function_node_selector:
+        f.enrich_runtime_spec(
+            project.spec.default_function_node_selector,
+        )
     if try_auto_mount:
         if (
             decorator and AutoMountType.is_auto_modifier(decorator)
@@ -608,6 +613,7 @@ class _KFPRunner(_PipelineRunner):
             namespace=namespace,
             artifact_path=artifact_path,
             cleanup_ttl=workflow_spec.cleanup_ttl,
+            timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
         )
         # The user provided workflow code might have made changes to function specs that require cleanup
@@ -865,15 +871,21 @@ class _RemoteRunner(_PipelineRunner):
                 )
                 return
+            get_workflow_id_timeout = max(
+                int(mlrun.mlconf.workflows.timeouts.remote),
+                int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
+            )
             logger.debug(
                 "Workflow submitted, waiting for pipeline run to start",
                 workflow_name=workflow_response.name,
+                get_workflow_id_timeout=get_workflow_id_timeout,
             )
             # Getting workflow id from run:
             response = retry_until_successful(
                 1,
-                getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
+                get_workflow_id_timeout,
                 logger,
                 False,
                 run_db.get_workflow_id,
@@ -1059,7 +1071,7 @@ def load_and_run(
                 )
             except Exception as exc:
-                logger.error("Failed to send slack notification", exc=exc)
+                logger.error("Failed to send slack notification", exc=err_to_str(exc))
         raise error

mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc6py3-none-any.whl → 1.7.0rc8py3-none-any.whl