PyPI - mlrun - Versions diffs - 1.8.0rc21__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl - Mend

mlrun 1.8.0rc21py3-none-any.whl → 1.8.0rc26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (47) hide show

mlrun/__init__.py +37 -3
mlrun/alerts/alert.py +1 -0
mlrun/artifacts/document.py +78 -36
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/constants.py +27 -9
mlrun/common/schemas/workflow.py +1 -0
mlrun/config.py +39 -6
mlrun/datastore/datastore_profile.py +58 -16
mlrun/datastore/sources.py +7 -1
mlrun/datastore/vectorstore.py +20 -1
mlrun/db/base.py +11 -0
mlrun/db/httpdb.py +21 -9
mlrun/db/nopdb.py +10 -0
mlrun/errors.py +4 -0
mlrun/execution.py +15 -6
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +5 -1
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/controller.py +266 -103
mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +5 -2
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +8 -8
mlrun/model_monitoring/helpers.py +16 -10
mlrun/model_monitoring/stream_processing.py +85 -35
mlrun/package/context_handler.py +1 -1
mlrun/package/packagers_manager.py +4 -18
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +123 -38
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/secrets.py +1 -1
mlrun/serving/server.py +11 -3
mlrun/serving/states.py +65 -8
mlrun/serving/v2_serving.py +16 -8
mlrun/utils/helpers.py +81 -21
mlrun/utils/notifications/notification/base.py +6 -1
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +13 -4
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +47 -47
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc21.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/__init__.py CHANGED Viewed

@@ -67,43 +67,31 @@ class ObjectTSDBFactory(enum.Enum):
 def get_tsdb_connector(
     project: str,
     secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
-    tsdb_connection_string: typing.Optional[str] = None,
-    **kwargs,
+    profile: typing.Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
 ) -> TSDBConnector:
     """
     Get TSDB connector object.
     :param project:                 The name of the project.
     :param secret_provider:         An optional secret provider to get the connection string secret.
-    :param tsdb_connection_string:  An optional explicit connection string to the TSDB.
+    :param profile:                 An optional profile to initialize the TSDB connector from.
     :return: `TSDBConnector` object. The main goal of this object is to handle different operations on the
              TSDB connector such as updating drift metrics or write application record result.
     :raise: `MLRunInvalidMMStoreTypeError` if the user didn't provide TSDB connection
             or the provided TSDB connection is invalid.
     """
-    try:
-        profile = mlrun.model_monitoring.helpers._get_tsdb_profile(
-            project=project, secret_provider=secret_provider
-        )
-    except mlrun.errors.MLRunNotFoundError:
-        profile = None
-    tsdb_connection_string = (
-        tsdb_connection_string
-        or mlrun.model_monitoring.helpers.get_tsdb_connection_string(
-            secret_provider=secret_provider
-        )
+    profile = profile or mlrun.model_monitoring.helpers._get_tsdb_profile(
+        project=project, secret_provider=secret_provider
     )
-    if tsdb_connection_string and tsdb_connection_string.startswith("taosws"):
-        tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
-        kwargs["connection_string"] = tsdb_connection_string
-    elif tsdb_connection_string and tsdb_connection_string == "v3io":
-        tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
-    elif isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
+    kwargs = {}
+    if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
         tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
         kwargs["v3io_access_key"] = profile.v3io_access_key
+    elif isinstance(
+        profile, mlrun.datastore.datastore_profile.TDEngineDatastoreProfile
+    ):
+        tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
+        kwargs["connection_string"] = profile.dsn()
     else:
         raise mlrun.errors.MLRunInvalidMMStoreTypeError(
             "You must provide a valid tsdb store connection by using "

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -145,8 +145,11 @@ class TDEngineConnector(TSDBConnector):
         create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
+        # we need the string values to be sent to the connection, not the enum
+        columns = {str(key): str(val) for key, val in table.columns.items()}
         insert_statement = Statement(
-            columns=table.columns,
+            columns=columns,
             subtable=table_name,
             values=event,
         )
@@ -188,7 +191,7 @@ class TDEngineConnector(TSDBConnector):
             graph.add_step(
                 "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ProcessBeforeTDEngine",
                 name="ProcessBeforeTDEngine",
-                after="MapFeatureNames",
+                after="FilterNOP",
             )
         def apply_tdengine_target(name, after):

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -135,7 +135,7 @@ class V3IOTSDBConnector(TSDBConnector):
         monitoring_predictions_full_path = (
             mlrun.mlconf.get_model_monitoring_file_target_path(
                 project=self.project,
-                kind=mm_schemas.FileTargetKind.PREDICTIONS,
+                kind=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             )
         )
         (
@@ -145,7 +145,7 @@ class V3IOTSDBConnector(TSDBConnector):
         ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
             monitoring_predictions_full_path
         )
-        self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
+        self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS] = monitoring_predictions_path
     def create_tables(self) -> None:
         """
@@ -204,7 +204,7 @@ class V3IOTSDBConnector(TSDBConnector):
                     }
                 ],
                 name=EventFieldType.LATENCY,
-                after="MapFeatureNames",
+                after="FilterNOP",
                 step_name="Aggregates",
                 table=".",
                 key_field=EventFieldType.ENDPOINT_ID,
@@ -225,8 +225,8 @@ class V3IOTSDBConnector(TSDBConnector):
         graph.add_step(
             "storey.TSDBTarget",
             name="tsdb_predictions",
-            after="MapFeatureNames",
-            path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
+            after="FilterNOP",
+            path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS]}",
             rate="1/s",
             time_col=mm_schemas.EventFieldType.TIMESTAMP,
             container=self.container,
@@ -740,7 +740,7 @@ class V3IOTSDBConnector(TSDBConnector):
                 "both or neither of `aggregation_window` and `agg_funcs` must be provided"
             )
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT],
@@ -782,7 +782,7 @@ class V3IOTSDBConnector(TSDBConnector):
         filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start, end = self._get_start_end(start, end)
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             filter_query=filter_query,
@@ -919,7 +919,7 @@ class V3IOTSDBConnector(TSDBConnector):
         start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.LATENCY],

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -109,7 +109,7 @@ def filter_results_by_regex(
             result_name_filters=validated_filters,
         ):
             filtered_metrics_names.append(existing_result_name)
-    return filtered_metrics_names
+    return list(set(filtered_metrics_names))
 def get_stream_path(
@@ -117,6 +117,7 @@ def get_stream_path(
     function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
     stream_uri: Optional[str] = None,
     secret_provider: Optional[Callable[[str], str]] = None,
+    profile: Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
@@ -126,20 +127,25 @@ def get_stream_path(
     :param stream_uri:          Stream URI. If provided, it will be used instead of the one from the project's secret.
     :param secret_provider:     Optional secret provider to get the connection string secret.
                                 If not set, the env vars are used.
+    :param profile:             Optional datastore profile of the stream (V3IO/KafkaSource profile).
     :return:                    Monitoring stream path to the relevant application.
     """
-    try:
-        profile = _get_stream_profile(project=project, secret_provider=secret_provider)
-    except mlrun.errors.MLRunNotFoundError:
-        profile = None
+    profile = profile or _get_stream_profile(
+        project=project, secret_provider=secret_provider
+    )
     if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
         stream_uri = "v3io"
-    stream_uri = stream_uri or mlrun.get_secret_or_env(
-        key=mm_constants.ProjectSecretKeys.STREAM_PATH, secret_provider=secret_provider
-    )
+    elif isinstance(
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+    ):
+        stream_uri = f"kafka://{profile.brokers[0]}"
+    else:
+        raise mlrun.errors.MLRunValueError(
+            f"Received an unexpected stream profile type: {type(profile)}\n"
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+        )
     if not stream_uri or stream_uri == "v3io":
         stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -273,7 +279,7 @@ def _get_profile(
     )
     if not profile_name:
         raise mlrun.errors.MLRunNotFoundError(
-            f"Not found `{profile_name_key}` profile name"
+            f"Not found `{profile_name_key}` profile name for project '{project}'"
         )
     return mlrun.datastore.datastore_profile.datastore_profile_read(
         url=f"ds://{profile_name}", project_name=project, secrets=secret_provider

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import collections
 import datetime
 import os
 import typing
@@ -29,11 +28,14 @@ import mlrun.model_monitoring.db
 import mlrun.serving.states
 import mlrun.utils
 from mlrun.common.schemas.model_monitoring.constants import (
+    ControllerEvent,
+    ControllerEventKind,
     EndpointType,
     EventFieldType,
     FileTargetKind,
     ProjectSecretKeys,
 )
+from mlrun.datastore import parse_kafka_url
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.utils import logger
@@ -88,7 +90,9 @@ class EventStreamProcessor:
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
         self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
-        self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
+        self.v3io_access_key = v3io_access_key or mlrun.get_secret_or_env(
+            "V3IO_ACCESS_KEY"
+        )
         self.model_monitoring_access_key = (
             model_monitoring_access_key
             or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
@@ -118,6 +122,7 @@ class EventStreamProcessor:
         self,
         fn: mlrun.runtimes.ServingRuntime,
         tsdb_connector: TSDBConnector,
+        controller_stream_uri: str,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -146,6 +151,8 @@ class EventStreamProcessor:
         :param fn: A serving function.
         :param tsdb_connector: Time series database connector.
+        :param controller_stream_uri: The controller stream URI. Runs on server api pod so needed to be provided as
+        input
         """
         graph = typing.cast(
@@ -209,6 +216,20 @@ class EventStreamProcessor:
             )
         apply_map_feature_names()
+        # split the graph between event with error vs valid event
+        graph.add_step(
+            "storey.Filter",
+            "FilterNOP",
+            after="MapFeatureNames",
+            _fn="(event.get('kind', " ") != 'nop_event')",
+        )
+        graph.add_step(
+            "storey.Filter",
+            "ForwardNOP",
+            after="MapFeatureNames",
+            _fn="(event.get('kind', " ") == 'nop_event')",
+        )
         tsdb_connector.apply_monitoring_stream_steps(
             graph=graph,
             aggregate_windows=self.aggregate_windows,
@@ -221,7 +242,7 @@ class EventStreamProcessor:
             graph.add_step(
                 "ProcessBeforeParquet",
                 name="ProcessBeforeParquet",
-                after="MapFeatureNames",
+                after="FilterNOP",
                 _fn="(event)",
             )
@@ -248,6 +269,44 @@ class EventStreamProcessor:
         apply_parquet_target()
+        # controller branch
+        def apply_push_controller_stream(stream_uri: str):
+            if stream_uri.startswith("v3io://"):
+                graph.add_step(
+                    ">>",
+                    "controller_stream_v3io",
+                    path=stream_uri,
+                    sharding_func=ControllerEvent.ENDPOINT_ID,
+                    access_key=self.v3io_access_key,
+                    after="ForwardNOP",
+                )
+            elif stream_uri.startswith("kafka://"):
+                topic, brokers = parse_kafka_url(stream_uri)
+                logger.info(
+                    "Controller stream uri for kafka",
+                    stream_uri=stream_uri,
+                    topic=topic,
+                    brokers=brokers,
+                )
+                if isinstance(brokers, list):
+                    path = f"kafka://{brokers[0]}/{topic}"
+                elif isinstance(brokers, str):
+                    path = f"kafka://{brokers}/{topic}"
+                else:
+                    raise mlrun.errors.MLRunInvalidArgumentError(
+                        "Brokers must be a list or str check controller stream uri"
+                    )
+                graph.add_step(
+                    ">>",
+                    "controller_stream_kafka",
+                    path=path,
+                    kafka_brokers=brokers,
+                    _sharding_func=ControllerEvent.ENDPOINT_ID,
+                    after="ForwardNOP",
+                )
+        apply_push_controller_stream(controller_stream_uri)
 class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
@@ -313,14 +372,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.first_request: dict[str, str] = dict()
         self.last_request: dict[str, str] = dict()
-        # Number of errors (value) per endpoint (key)
-        self.error_count: dict[str, int] = collections.defaultdict(int)
         # Set of endpoints in the current events
         self.endpoints: set[str] = set()
     def do(self, full_event):
         event = full_event.body
+        if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
+            logger.info("Skipped nop event inside of ProcessEndpointEvent", event=event)
+            return storey.Event(body=[event])
         # Getting model version and function uri from event
         # and use them for retrieving the endpoint_id
         function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
@@ -354,10 +413,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         predictions = event.get("resp", {}).get("outputs")
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            timestamp,
-            ["when"],
+            validation_function=is_not_none,
+            field=timestamp,
+            dict_path=["when"],
         ):
             return None
@@ -369,31 +427,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.last_request[endpoint_id] = timestamp
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            request_id,
-            ["request", "id"],
+            validation_function=is_not_none,
+            field=request_id,
+            dict_path=["request", "id"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            latency,
-            ["microsec"],
+            validation_function=is_not_none,
+            field=latency,
+            dict_path=["microsec"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            features,
-            ["request", "inputs"],
+            validation_function=is_not_none,
+            field=features,
+            dict_path=["request", "inputs"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            predictions,
-            ["resp", "outputs"],
+            validation_function=is_not_none,
+            field=predictions,
+            dict_path=["resp", "outputs"],
         ):
             return None
@@ -451,7 +505,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     EventFieldType.LAST_REQUEST_TIMESTAMP: mlrun.utils.enrich_datetime_with_tz_info(
                         self.last_request[endpoint_id]
                     ).timestamp(),
-                    EventFieldType.ERROR_COUNT: self.error_count[endpoint_id],
                     EventFieldType.LABELS: event.get(EventFieldType.LABELS, {}),
                     EventFieldType.METRICS: event.get(EventFieldType.METRICS, {}),
                     EventFieldType.ENTITIES: event.get("request", {}).get(
@@ -482,7 +535,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 .flat_dict()
             )
-            # If model endpoint found, get first_request, last_request and error_count values
+            # If model endpoint found, get first_request & last_request values
             if endpoint_record:
                 first_request = endpoint_record.get(EventFieldType.FIRST_REQUEST)
@@ -493,24 +546,18 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 if last_request:
                     self.last_request[endpoint_id] = last_request
-                error_count = endpoint_record.get(EventFieldType.ERROR_COUNT)
-                if error_count:
-                    self.error_count[endpoint_id] = int(error_count)
             # add endpoint to endpoints set
             self.endpoints.add(endpoint_id)
     def is_valid(
         self,
-        endpoint_id: str,
         validation_function,
         field: typing.Any,
         dict_path: list[str],
     ):
         if validation_function(field, dict_path):
             return True
-        self.error_count[endpoint_id] += 1
         return False
     @staticmethod
@@ -589,6 +636,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         return None
     def do(self, event: dict):
+        if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
+            logger.info("Skipped nop event inside of MapFeatureNames", event=event)
+            return event
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
         feature_values = event[EventFieldType.FEATURES]

mlrun/package/context_handler.py CHANGED Viewed

@@ -50,7 +50,7 @@ class ContextHandler:
         "numpy",
     ]
     # Optional packagers to be collected at initialization time:
-    _EXTENDED_PACKAGERS = []  # TODO: Create "matplotlib", "plotly", "bokeh" packagers.
+    _EXTENDED_PACKAGERS = []  # TODO: Create "matplotlib", "plotly", packagers.
     # Optional packagers from the `mlrun.frameworks` package:
     _MLRUN_FRAMEWORKS_PACKAGERS = []  # TODO: Create frameworks packagers.
     # Default priority values for packagers:

mlrun/package/packagers_manager.py CHANGED Viewed

@@ -667,16 +667,9 @@ class PackagersManager:
                         data_item=data_item,
                         instructions={},
                     )
-                except Exception as exception:
+                except Exception:
                     # Could not unpack as the reduced type hint, collect the exception and go to the next one:
-                    exception_string = "".join(
-                        traceback.format_exception(
-                            etype=type(exception),
-                            value=exception,
-                            tb=exception.__traceback__,
-                        )
-                    )
-                    found_packagers.append((packager, exception_string))
+                    found_packagers.append((packager, traceback.format_exc()))
             # Reduce the type hint list and continue:
             possible_type_hints = TypeHintUtils.reduce_type_hint(
                 type_hint=possible_type_hints
@@ -692,15 +685,8 @@ class PackagersManager:
                 artifact_type=None,
                 instructions={},
             )
-        except Exception as exception:
-            exception_string = "".join(
-                traceback.format_exception(
-                    etype=type(exception),
-                    value=exception,
-                    tb=exception.__traceback__,
-                )
-            )
-            found_packagers.append((self._default_packager, exception_string))
+        except Exception:
+            found_packagers.append((self._default_packager, traceback.format_exc()))
         # The method did not return until this point, raise an error:
         raise MLRunPackageUnpackingError(

mlrun/projects/pipelines.py CHANGED Viewed

@@ -31,7 +31,7 @@ import mlrun_pipelines.patcher
 import mlrun_pipelines.utils
 from mlrun.errors import err_to_str
 from mlrun.utils import (
-    get_ui_url,
+    get_workflow_url,
     logger,
     normalize_workflow_name,
     retry_until_successful,
@@ -1225,7 +1225,7 @@ def notify_scheduled_workflow_failure(
         notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
             ["slack"]
         )
-        url = get_ui_url(project_name, context_uid)
+        url = get_workflow_url(project_name, context_uid)
         link = f"<{url}|*view workflow job details*>"
         message = (
             f":x: Failed to run scheduled workflow {workflow_name} "

mlrun 1.8.0rc21__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc21py3-none-any.whl → 1.8.0rc26py3-none-any.whl