PyPI - mlrun - Versions diffs - 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl - Mend

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show

mlrun/__init__.py +37 -3
mlrun/__main__.py +5 -0
mlrun/alerts/alert.py +1 -0
mlrun/artifacts/document.py +78 -36
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/runtimes/constants.py +17 -0
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/constants.py +32 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/config.py +39 -6
mlrun/datastore/datastore_profile.py +58 -16
mlrun/datastore/sources.py +7 -1
mlrun/datastore/vectorstore.py +20 -1
mlrun/db/base.py +20 -0
mlrun/db/httpdb.py +97 -10
mlrun/db/nopdb.py +19 -0
mlrun/errors.py +4 -0
mlrun/execution.py +15 -6
mlrun/frameworks/_common/model_handler.py +0 -2
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +5 -1
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/controller.py +266 -103
mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
mlrun/model_monitoring/helpers.py +16 -10
mlrun/model_monitoring/stream_processing.py +106 -35
mlrun/package/context_handler.py +1 -1
mlrun/package/packagers_manager.py +4 -18
mlrun/projects/pipelines.py +18 -5
mlrun/projects/project.py +156 -39
mlrun/runtimes/nuclio/serving.py +22 -13
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/secrets.py +1 -1
mlrun/serving/server.py +11 -3
mlrun/serving/states.py +65 -8
mlrun/serving/v2_serving.py +67 -44
mlrun/utils/helpers.py +111 -23
mlrun/utils/notifications/notification/base.py +6 -1
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +67 -36
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -33,7 +33,7 @@ _TSDB_BE = "tsdb"
 _TSDB_RATE = "1/s"
 _CONTAINER = "users"
-V3IO_MEPS_LIMIT = 50  # TODO remove limitation after fixing ML-8886
+V3IO_MEPS_LIMIT = 200
 def _is_no_schema_error(exc: v3io_frames.Error) -> bool:
@@ -135,7 +135,7 @@ class V3IOTSDBConnector(TSDBConnector):
         monitoring_predictions_full_path = (
             mlrun.mlconf.get_model_monitoring_file_target_path(
                 project=self.project,
-                kind=mm_schemas.FileTargetKind.PREDICTIONS,
+                kind=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             )
         )
         (
@@ -145,7 +145,7 @@ class V3IOTSDBConnector(TSDBConnector):
         ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
             monitoring_predictions_full_path
         )
-        self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
+        self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS] = monitoring_predictions_path
     def create_tables(self) -> None:
         """
@@ -204,7 +204,7 @@ class V3IOTSDBConnector(TSDBConnector):
                     }
                 ],
                 name=EventFieldType.LATENCY,
-                after="MapFeatureNames",
+                after="FilterNOP",
                 step_name="Aggregates",
                 table=".",
                 key_field=EventFieldType.ENDPOINT_ID,
@@ -225,8 +225,8 @@ class V3IOTSDBConnector(TSDBConnector):
         graph.add_step(
             "storey.TSDBTarget",
             name="tsdb_predictions",
-            after="MapFeatureNames",
-            path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
+            after="FilterNOP",
+            path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS]}",
             rate="1/s",
             time_col=mm_schemas.EventFieldType.TIMESTAMP,
             container=self.container,
@@ -234,6 +234,8 @@ class V3IOTSDBConnector(TSDBConnector):
             columns=[
                 mm_schemas.EventFieldType.LATENCY,
                 mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP,
+                mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT,
+                mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT,
             ],
             index_cols=[
                 mm_schemas.EventFieldType.ENDPOINT_ID,
@@ -580,14 +582,18 @@ class V3IOTSDBConnector(TSDBConnector):
         )
     @staticmethod
-    def _get_endpoint_filter(endpoint_id: Union[str, list[str]]):
+    def _get_endpoint_filter(endpoint_id: Union[str, list[str]]) -> Optional[str]:
         if isinstance(endpoint_id, str):
             return f"endpoint_id=='{endpoint_id}'"
         elif isinstance(endpoint_id, list):
             if len(endpoint_id) > V3IO_MEPS_LIMIT:
-                raise mlrun.errors.MLRunInvalidArgumentError(
-                    f"Filtering more than {V3IO_MEPS_LIMIT} model endpoints in the V3IO connector is not supported."
+                logger.info(
+                    "The number of endpoint ids exceeds the v3io-engine filter-expression limit, "
+                    "retrieving all the model endpoints from the db.",
+                    limit=V3IO_MEPS_LIMIT,
+                    amount=len(endpoint_id),
                 )
+                return None
             return f"endpoint_id IN({str(endpoint_id)[1:-1]}) "
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -734,10 +740,10 @@ class V3IOTSDBConnector(TSDBConnector):
                 "both or neither of `aggregation_window` and `agg_funcs` must be provided"
             )
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
-            columns=[mm_schemas.EventFieldType.LATENCY],
+            columns=[mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT],
             filter_query=f"endpoint_id=='{endpoint_id}'",
             agg_funcs=agg_funcs,
             sliding_window_step=aggregation_window,
@@ -751,10 +757,10 @@ class V3IOTSDBConnector(TSDBConnector):
                 type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
             )
-        latency_column = (
-            f"{agg_funcs[0]}({mm_schemas.EventFieldType.LATENCY})"
+        estimated_prediction_count = (
+            f"{agg_funcs[0]}({mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT})"
             if agg_funcs
-            else mm_schemas.EventFieldType.LATENCY
+            else mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT
         )
         return mm_schemas.ModelEndpointMonitoringMetricValues(
@@ -762,7 +768,7 @@ class V3IOTSDBConnector(TSDBConnector):
             values=list(
                 zip(
                     df.index,
-                    df[latency_column],
+                    df[estimated_prediction_count],
                 )
             ),  # pyright: ignore[reportArgumentType]
         )
@@ -773,15 +779,13 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> pd.DataFrame:
-        endpoint_ids = (
-            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
-        )
+        filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start, end = self._get_start_end(start, end)
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
-            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            filter_query=filter_query,
             agg_funcs=["last"],
         )
         if not df.empty:
@@ -808,9 +812,7 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> pd.DataFrame:
-        endpoint_ids = (
-            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
-        )
+        filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
@@ -818,7 +820,7 @@ class V3IOTSDBConnector(TSDBConnector):
             start=start,
             end=end,
             columns=[mm_schemas.ResultData.RESULT_STATUS],
-            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            filter_query=filter_query,
             agg_funcs=["max"],
             group_by="endpoint_id",
         )
@@ -883,17 +885,18 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> pd.DataFrame:
-        endpoint_ids = (
-            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
-        )
+        filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
+        if filter_query:
+            filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
+        else:
+            filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.ERRORS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.ERROR_COUNT],
-            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
-            f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
+            filter_query=filter_query,
             agg_funcs=["count"],
         )
         if not df.empty:
@@ -912,17 +915,15 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> pd.DataFrame:
-        endpoint_ids = (
-            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
-        )
+        filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
-            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.LATENCY],
-            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            filter_query=filter_query,
             agg_funcs=["avg"],
         )
         if not df.empty:

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -109,7 +109,7 @@ def filter_results_by_regex(
             result_name_filters=validated_filters,
         ):
             filtered_metrics_names.append(existing_result_name)
-    return filtered_metrics_names
+    return list(set(filtered_metrics_names))
 def get_stream_path(
@@ -117,6 +117,7 @@ def get_stream_path(
     function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
     stream_uri: Optional[str] = None,
     secret_provider: Optional[Callable[[str], str]] = None,
+    profile: Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
@@ -126,20 +127,25 @@ def get_stream_path(
     :param stream_uri:          Stream URI. If provided, it will be used instead of the one from the project's secret.
     :param secret_provider:     Optional secret provider to get the connection string secret.
                                 If not set, the env vars are used.
+    :param profile:             Optional datastore profile of the stream (V3IO/KafkaSource profile).
     :return:                    Monitoring stream path to the relevant application.
     """
-    try:
-        profile = _get_stream_profile(project=project, secret_provider=secret_provider)
-    except mlrun.errors.MLRunNotFoundError:
-        profile = None
+    profile = profile or _get_stream_profile(
+        project=project, secret_provider=secret_provider
+    )
     if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
         stream_uri = "v3io"
-    stream_uri = stream_uri or mlrun.get_secret_or_env(
-        key=mm_constants.ProjectSecretKeys.STREAM_PATH, secret_provider=secret_provider
-    )
+    elif isinstance(
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+    ):
+        stream_uri = f"kafka://{profile.brokers[0]}"
+    else:
+        raise mlrun.errors.MLRunValueError(
+            f"Received an unexpected stream profile type: {type(profile)}\n"
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+        )
     if not stream_uri or stream_uri == "v3io":
         stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -273,7 +279,7 @@ def _get_profile(
     )
     if not profile_name:
         raise mlrun.errors.MLRunNotFoundError(
-            f"Not found `{profile_name_key}` profile name"
+            f"Not found `{profile_name_key}` profile name for project '{project}'"
         )
     return mlrun.datastore.datastore_profile.datastore_profile_read(
         url=f"ds://{profile_name}", project_name=project, secrets=secret_provider

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import collections
 import datetime
 import os
 import typing
@@ -29,11 +28,14 @@ import mlrun.model_monitoring.db
 import mlrun.serving.states
 import mlrun.utils
 from mlrun.common.schemas.model_monitoring.constants import (
+    ControllerEvent,
+    ControllerEventKind,
     EndpointType,
     EventFieldType,
     FileTargetKind,
     ProjectSecretKeys,
 )
+from mlrun.datastore import parse_kafka_url
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.utils import logger
@@ -88,7 +90,9 @@ class EventStreamProcessor:
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
         self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
-        self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
+        self.v3io_access_key = v3io_access_key or mlrun.get_secret_or_env(
+            "V3IO_ACCESS_KEY"
+        )
         self.model_monitoring_access_key = (
             model_monitoring_access_key
             or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
@@ -118,6 +122,7 @@ class EventStreamProcessor:
         self,
         fn: mlrun.runtimes.ServingRuntime,
         tsdb_connector: TSDBConnector,
+        controller_stream_uri: str,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -146,6 +151,8 @@ class EventStreamProcessor:
         :param fn: A serving function.
         :param tsdb_connector: Time series database connector.
+        :param controller_stream_uri: The controller stream URI. Runs on server api pod so needed to be provided as
+        input
         """
         graph = typing.cast(
@@ -209,6 +216,20 @@ class EventStreamProcessor:
             )
         apply_map_feature_names()
+        # split the graph between event with error vs valid event
+        graph.add_step(
+            "storey.Filter",
+            "FilterNOP",
+            after="MapFeatureNames",
+            _fn="(event.get('kind', " ") != 'nop_event')",
+        )
+        graph.add_step(
+            "storey.Filter",
+            "ForwardNOP",
+            after="MapFeatureNames",
+            _fn="(event.get('kind', " ") == 'nop_event')",
+        )
         tsdb_connector.apply_monitoring_stream_steps(
             graph=graph,
             aggregate_windows=self.aggregate_windows,
@@ -221,7 +242,7 @@ class EventStreamProcessor:
             graph.add_step(
                 "ProcessBeforeParquet",
                 name="ProcessBeforeParquet",
-                after="MapFeatureNames",
+                after="FilterNOP",
                 _fn="(event)",
             )
@@ -248,6 +269,44 @@ class EventStreamProcessor:
         apply_parquet_target()
+        # controller branch
+        def apply_push_controller_stream(stream_uri: str):
+            if stream_uri.startswith("v3io://"):
+                graph.add_step(
+                    ">>",
+                    "controller_stream_v3io",
+                    path=stream_uri,
+                    sharding_func=ControllerEvent.ENDPOINT_ID,
+                    access_key=self.v3io_access_key,
+                    after="ForwardNOP",
+                )
+            elif stream_uri.startswith("kafka://"):
+                topic, brokers = parse_kafka_url(stream_uri)
+                logger.info(
+                    "Controller stream uri for kafka",
+                    stream_uri=stream_uri,
+                    topic=topic,
+                    brokers=brokers,
+                )
+                if isinstance(brokers, list):
+                    path = f"kafka://{brokers[0]}/{topic}"
+                elif isinstance(brokers, str):
+                    path = f"kafka://{brokers}/{topic}"
+                else:
+                    raise mlrun.errors.MLRunInvalidArgumentError(
+                        "Brokers must be a list or str check controller stream uri"
+                    )
+                graph.add_step(
+                    ">>",
+                    "controller_stream_kafka",
+                    path=path,
+                    kafka_brokers=brokers,
+                    _sharding_func=ControllerEvent.ENDPOINT_ID,
+                    after="ForwardNOP",
+                )
+        apply_push_controller_stream(controller_stream_uri)
 class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
@@ -313,14 +372,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.first_request: dict[str, str] = dict()
         self.last_request: dict[str, str] = dict()
-        # Number of errors (value) per endpoint (key)
-        self.error_count: dict[str, int] = collections.defaultdict(int)
         # Set of endpoints in the current events
         self.endpoints: set[str] = set()
     def do(self, full_event):
         event = full_event.body
+        if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
+            logger.info("Skipped nop event inside of ProcessEndpointEvent", event=event)
+            return storey.Event(body=[event])
         # Getting model version and function uri from event
         # and use them for retrieving the endpoint_id
         function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
@@ -354,10 +413,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         predictions = event.get("resp", {}).get("outputs")
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            timestamp,
-            ["when"],
+            validation_function=is_not_none,
+            field=timestamp,
+            dict_path=["when"],
         ):
             return None
@@ -369,31 +427,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.last_request[endpoint_id] = timestamp
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            request_id,
-            ["request", "id"],
+            validation_function=is_not_none,
+            field=request_id,
+            dict_path=["request", "id"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            latency,
-            ["microsec"],
+            validation_function=is_not_none,
+            field=latency,
+            dict_path=["microsec"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            features,
-            ["request", "inputs"],
+            validation_function=is_not_none,
+            field=features,
+            dict_path=["request", "inputs"],
         ):
             return None
         if not self.is_valid(
-            endpoint_id,
-            is_not_none,
-            predictions,
-            ["resp", "outputs"],
+            validation_function=is_not_none,
+            field=predictions,
+            dict_path=["resp", "outputs"],
         ):
             return None
@@ -430,6 +484,10 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
             if not isinstance(feature, list):
                 feature = [feature]
+            effective_sample_count, estimated_prediction_count = (
+                self._get_effective_and_estimated_counts(event=event)
+            )
             events.append(
                 {
                     EventFieldType.FUNCTION_URI: function_uri,
@@ -447,12 +505,13 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     EventFieldType.LAST_REQUEST_TIMESTAMP: mlrun.utils.enrich_datetime_with_tz_info(
                         self.last_request[endpoint_id]
                     ).timestamp(),
-                    EventFieldType.ERROR_COUNT: self.error_count[endpoint_id],
                     EventFieldType.LABELS: event.get(EventFieldType.LABELS, {}),
                     EventFieldType.METRICS: event.get(EventFieldType.METRICS, {}),
                     EventFieldType.ENTITIES: event.get("request", {}).get(
                         EventFieldType.ENTITIES, {}
                     ),
+                    EventFieldType.EFFECTIVE_SAMPLE_COUNT: effective_sample_count,
+                    EventFieldType.ESTIMATED_PREDICTION_COUNT: estimated_prediction_count,
                 }
             )
@@ -476,7 +535,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 .flat_dict()
             )
-            # If model endpoint found, get first_request, last_request and error_count values
+            # If model endpoint found, get first_request & last_request values
             if endpoint_record:
                 first_request = endpoint_record.get(EventFieldType.FIRST_REQUEST)
@@ -487,26 +546,34 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 if last_request:
                     self.last_request[endpoint_id] = last_request
-                error_count = endpoint_record.get(EventFieldType.ERROR_COUNT)
-                if error_count:
-                    self.error_count[endpoint_id] = int(error_count)
             # add endpoint to endpoints set
             self.endpoints.add(endpoint_id)
     def is_valid(
         self,
-        endpoint_id: str,
         validation_function,
         field: typing.Any,
         dict_path: list[str],
     ):
         if validation_function(field, dict_path):
             return True
-        self.error_count[endpoint_id] += 1
         return False
+    @staticmethod
+    def _get_effective_and_estimated_counts(event):
+        """
+        Calculate the `effective_sample_count` and the `estimated_prediction_count` based on the event's
+        sampling percentage. These values will be stored in the TSDB target.
+        Note that In non-batch serving, the `effective_sample_count` is always set to 1. In addition, when the sampling
+        percentage is 100%, the `estimated_prediction_count` is equal to the `effective_sample_count`.
+        """
+        effective_sample_count = event.get(EventFieldType.EFFECTIVE_SAMPLE_COUNT, 1)
+        estimated_prediction_count = effective_sample_count * (
+            100 / event.get(EventFieldType.SAMPLING_PERCENTAGE, 100)
+        )
+        return effective_sample_count, estimated_prediction_count
 def is_not_none(field: typing.Any, dict_path: list[str]):
     if field is not None:
@@ -569,6 +636,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         return None
     def do(self, event: dict):
+        if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
+            logger.info("Skipped nop event inside of MapFeatureNames", event=event)
+            return event
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
         feature_values = event[EventFieldType.FEATURES]
@@ -672,6 +742,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     )
                 )
             self.first_request[endpoint_id] = True
         if attributes_to_update:
             logger.info(
                 "Updating endpoint record",

mlrun/package/context_handler.py CHANGED Viewed

@@ -50,7 +50,7 @@ class ContextHandler:
         "numpy",
     ]
     # Optional packagers to be collected at initialization time:
-    _EXTENDED_PACKAGERS = []  # TODO: Create "matplotlib", "plotly", "bokeh" packagers.
+    _EXTENDED_PACKAGERS = []  # TODO: Create "matplotlib", "plotly", packagers.
     # Optional packagers from the `mlrun.frameworks` package:
     _MLRUN_FRAMEWORKS_PACKAGERS = []  # TODO: Create frameworks packagers.
     # Default priority values for packagers:

mlrun/package/packagers_manager.py CHANGED Viewed

@@ -667,16 +667,9 @@ class PackagersManager:
                         data_item=data_item,
                         instructions={},
                     )
-                except Exception as exception:
+                except Exception:
                     # Could not unpack as the reduced type hint, collect the exception and go to the next one:
-                    exception_string = "".join(
-                        traceback.format_exception(
-                            etype=type(exception),
-                            value=exception,
-                            tb=exception.__traceback__,
-                        )
-                    )
-                    found_packagers.append((packager, exception_string))
+                    found_packagers.append((packager, traceback.format_exc()))
             # Reduce the type hint list and continue:
             possible_type_hints = TypeHintUtils.reduce_type_hint(
                 type_hint=possible_type_hints
@@ -692,15 +685,8 @@ class PackagersManager:
                 artifact_type=None,
                 instructions={},
             )
-        except Exception as exception:
-            exception_string = "".join(
-                traceback.format_exception(
-                    etype=type(exception),
-                    value=exception,
-                    tb=exception.__traceback__,
-                )
-            )
-            found_packagers.append((self._default_packager, exception_string))
+        except Exception:
+            found_packagers.append((self._default_packager, traceback.format_exc()))
         # The method did not return until this point, raise an error:
         raise MLRunPackageUnpackingError(

mlrun/projects/pipelines.py CHANGED Viewed

@@ -31,7 +31,7 @@ import mlrun_pipelines.patcher
 import mlrun_pipelines.utils
 from mlrun.errors import err_to_str
 from mlrun.utils import (
-    get_ui_url,
+    get_workflow_url,
     logger,
     normalize_workflow_name,
     retry_until_successful,
@@ -523,11 +523,12 @@ class _PipelineRunner(abc.ABC):
         text = _PipelineRunner._generate_workflow_finished_message(
             run.run_id, errors_counter, run._state
         )
         notifiers = notifiers or project.notifiers
         if notifiers:
             notifiers.push(text, "info", runs)
+        project.push_pipeline_notification_kfp_runner(run.run_id, run._state, text)
         if raise_error:
             raise raise_error
         return state or run._state, errors_counter, text
@@ -620,6 +621,8 @@ class _KFPRunner(_PipelineRunner):
                 params.update(notification.secret_params)
                 project.notifiers.add_notification(notification.kind, params)
+            project.spec.notifications = notifications
         run_id = _run_pipeline(
             workflow_handler,
             project=project.metadata.name,
@@ -647,13 +650,23 @@ class _KFPRunner(_PipelineRunner):
                     exc_info=err_to_str(exc),
                 )
-        # TODO: we should check how can we get the run uid when we don't have the context (for example on
-        #  mlrun.load_project() and later call directly to project.run)
+        # Pushing only relevant notification for the client (ipython and console)
+        project.notifiers.push_pipeline_start_message_from_client(
+            project.metadata.name, pipeline_id=run_id
+        )
         if context:
             project.notifiers.push_pipeline_start_message(
                 project.metadata.name,
                 context.uid,
             )
+        else:
+            project.push_pipeline_notification_kfp_runner(
+                run_id,
+                mlrun_pipelines.common.models.RunStatuses.running,
+                f"Workflow {run_id} started in project {project.metadata.name}",
+                notifications,
+            )
         pipeline_context.clear()
         return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
@@ -1212,7 +1225,7 @@ def notify_scheduled_workflow_failure(
         notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
             ["slack"]
         )
-        url = get_ui_url(project_name, context_uid)
+        url = get_workflow_url(project_name, context_uid)
         link = f"<{url}|*view workflow job details*>"
         message = (
             f":x: Failed to run scheduled workflow {workflow_name} "

mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl