PyPI - mlrun - Versions diffs - 1.7.0rc3__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl - Mend

mlrun 1.7.0rc3py3-none-any.whl → 1.7.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (69) hide show

mlrun/artifacts/manager.py +6 -1
mlrun/common/constants.py +1 -0
mlrun/common/model_monitoring/helpers.py +12 -6
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -1
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +20 -15
mlrun/datastore/azure_blob.py +22 -9
mlrun/datastore/base.py +15 -25
mlrun/datastore/datastore.py +19 -8
mlrun/datastore/datastore_profile.py +47 -5
mlrun/datastore/google_cloud_storage.py +10 -6
mlrun/datastore/hdfs.py +51 -0
mlrun/datastore/redis.py +4 -0
mlrun/datastore/s3.py +4 -0
mlrun/datastore/sources.py +29 -43
mlrun/datastore/targets.py +58 -48
mlrun/datastore/utils.py +2 -49
mlrun/datastore/v3io.py +4 -0
mlrun/db/base.py +34 -0
mlrun/db/httpdb.py +71 -42
mlrun/execution.py +3 -3
mlrun/feature_store/feature_vector.py +2 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +10 -5
mlrun/kfpops.py +19 -10
mlrun/model.py +5 -0
mlrun/model_monitoring/api.py +3 -3
mlrun/model_monitoring/application.py +1 -1
mlrun/model_monitoring/applications/__init__.py +13 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
mlrun/model_monitoring/batch.py +9 -111
mlrun/model_monitoring/controller.py +73 -55
mlrun/model_monitoring/controller_handler.py +13 -5
mlrun/model_monitoring/features_drift_table.py +62 -53
mlrun/model_monitoring/helpers.py +30 -21
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +8 -65
mlrun/projects/pipelines.py +21 -11
mlrun/projects/project.py +121 -42
mlrun/runtimes/base.py +21 -2
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/nuclio/function.py +9 -9
mlrun/runtimes/nuclio/serving.py +3 -3
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/serving/server.py +2 -8
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +27 -5
mlrun/utils/http.py +3 -3
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +69 -63
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/features_drift_table.py CHANGED Viewed

@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-from typing import Union
+import functools
+import sys
+from typing import Callable, Union
 import numpy as np
 import plotly.graph_objects as go
@@ -27,7 +29,7 @@ DriftResultType = tuple[mlrun.common.schemas.model_monitoring.DriftStatus, float
 class FeaturesDriftTablePlot:
     """
     Class for producing a features drift table. The plot is a table with columns of all the statistics and metrics
-    provided with two additional plot columns of the histograms and drift notification. The rows content will be drawn
+    provided with two additional plot columns of the histograms and drift status. The rows content will be drawn
     per feature.
     For example, if the statistics are 'mean', 'min', 'max' and one metric of 'tvd', for 3 features the table will be:
@@ -47,7 +49,7 @@ class FeaturesDriftTablePlot:
         70  # The width for the values of all the statistics and metrics columns.
     )
     _HISTOGRAMS_COLUMN_WIDTH = 180
-    _NOTIFICATIONS_COLUMN_WIDTH = 20
+    _STATUS_COLUMN_WIDTH = 20
     # Table rows heights:
     _HEADER_ROW_HEIGHT = 25
@@ -56,9 +58,10 @@ class FeaturesDriftTablePlot:
     # Histograms configurations:
     _SAMPLE_SET_HISTOGRAM_COLOR = "rgb(0,112,192)"  # Blue
     _INPUTS_HISTOGRAM_COLOR = "rgb(208,0,106)"  # Magenta
+    _HISTOGRAM_OPACITY = 0.75
-    # Notification configurations:
-    _NOTIFICATION_COLORS = {
+    # Status configurations:
+    _STATUS_COLORS = {
         mlrun.common.schemas.model_monitoring.DriftStatus.NO_DRIFT: "rgb(0,176,80)",  # Green
         mlrun.common.schemas.model_monitoring.DriftStatus.POSSIBLE_DRIFT: "rgb(255,192,0)",  # Orange
         mlrun.common.schemas.model_monitoring.DriftStatus.DRIFT_DETECTED: "rgb(208,0,106)",  # Magenta
@@ -78,9 +81,6 @@ class FeaturesDriftTablePlot:
     _BACKGROUND_COLOR = "rgb(255,255,255)"  # White
     _SEPARATORS_COLOR = "rgb(240,240,240)"  # Light grey
-    # File name:
-    _FILE_NAME = "table_plot.html"
     def __init__(self):
         """
         Initialize the plot producer for later calling the `produce` method.
@@ -198,7 +198,7 @@ class FeaturesDriftTablePlot:
                 self._FEATURE_NAME_COLUMN_WIDTH,
                 *self._value_columns_widths,
                 self._HISTOGRAMS_COLUMN_WIDTH,
-                self._NOTIFICATIONS_COLUMN_WIDTH,
+                self._STATUS_COLUMN_WIDTH,
             ],
             header_fill_color=self._BACKGROUND_COLOR,
         )
@@ -222,7 +222,7 @@ class FeaturesDriftTablePlot:
                 [self._FEATURE_NAME_COLUMN_WIDTH]
                 + [self._VALUE_COLUMN_WIDTH]
                 * (2 * len(self._statistics_columns) + len(self._metrics_columns))
-                + [self._HISTOGRAMS_COLUMN_WIDTH, self._NOTIFICATIONS_COLUMN_WIDTH]
+                + [self._HISTOGRAMS_COLUMN_WIDTH, self._STATUS_COLUMN_WIDTH]
             ),
             header_fill_color=self._BACKGROUND_COLOR,
         )
@@ -332,25 +332,25 @@ class FeaturesDriftTablePlot:
         return feature_row_table
-    def _plot_histogram_scatters(
-        self, sample_hist: tuple[list, list], input_hist: tuple[list, list]
-    ) -> tuple[go.Scatter, go.Scatter]:
+    def _plot_histogram_bars(
+        self,
+        figure_add_trace: Callable,
+        sample_hist: tuple[list, list],
+        input_hist: tuple[list, list],
+        showlegend: bool = False,
+    ) -> None:
         """
-        Plot the feature's histograms to include in the "histograms" column. Both histograms are returned to later be
-        added in the same figure, so they will be on top of each other and not separated. Both histograms are rescaled
+        Plot the feature's histograms to include in the "histograms" column. Both histograms are rescaled
         to be from 0.0 to 1.0, so they will be drawn in the same scale regardless the amount of elements they were
         calculated upon.
-        :param sample_hist: The sample set histogram data.
-        :param input_hist:  The input histogram data.
+        :param figure_add_trace: The figure's method that get the histogram and adds it to the figure.
+        :param sample_hist:      The sample set histogram data.
+        :param input_hist:       The input histogram data.
+        :param showlegend:       Show the legend for each histogram or not.
-        :return: A tuple with both histograms - `Scatter` traces:
-                 [0] - Sample set histogram.
-                 [1] - Input histogram.
+        :return: None
         """
-        # Initialize a list to collect the scatters:
-        scatters = []
         # Plot the histograms:
         for name, color, histogram in zip(
             ["sample", "input"],
@@ -361,23 +361,29 @@ class FeaturesDriftTablePlot:
             counts, bins = histogram
             # Rescale the counts to be in percentages (between 0.0 to 1.0):
             counts = np.array(counts) / sum(counts)
+            hovertext = [""] * len(counts)
             # Convert to NumPy for vectorization:
             bins = np.array(bins)
+            if bins[0] == -sys.float_info.max:
+                bins[0] = bins[1] - (bins[2] - bins[1])
+                hovertext[0] = f"(-∞, {bins[1]})"
+            if bins[-1] == sys.float_info.max:
+                bins[-1] = bins[-2] + (bins[-2] - bins[-3])
+                hovertext[-1] = f"({bins[-2]}, ∞)"
             # Center the bins (leave the first one):
             bins = 0.5 * (bins[:-1] + bins[1:])
             # Plot the histogram as a line with filled background below it:
-            histogram_scatter = go.Scatter(
+            histogram_bar = go.Bar(
                 x=bins,
                 y=counts,
-                fill="tozeroy",
                 name=name,
-                line_shape="spline",  # Make the line rounder.
-                line={"color": color},
+                marker_color=color,
+                opacity=self._HISTOGRAM_OPACITY,
                 legendgroup=name,
+                hovertext=hovertext,
+                showlegend=showlegend,
             )
-            scatters.append(histogram_scatter)
-        return scatters[0], scatters[1]
+            figure_add_trace(histogram_bar)
     def _calculate_row_height(self, features: list[str]) -> int:
         """
@@ -399,7 +405,7 @@ class FeaturesDriftTablePlot:
             self._FEATURE_ROW_HEIGHT, 1.5 * self._FONT_SIZE * feature_name_seperations
         )
-    def _plot_notification_circle(
+    def _plot_status_circle(
         self,
         figure: go.Figure,
         row: int,
@@ -407,8 +413,8 @@ class FeaturesDriftTablePlot:
         drift_result: DriftResultType,
     ):
         """
-        Plot the drift notification - a little circle with color as configured in the class. The color will beb chosen
-        according to the drift status given.
+        Plot the drift status - a little circle with color as configured in the
+        class. The color will be chosen according to the drift status given.
         :param figure:       The figure (feature row cell) to draw the circle in.
         :param row:          The row number.
@@ -420,12 +426,12 @@ class FeaturesDriftTablePlot:
         # row 3) times the plot columns (2 columns has axes in each row) + 2 (to get to the column of the notification):
         axis_number = (row - 3) * 2 + 2
         figure["layout"][f"xaxis{axis_number}"].update(
-            range=[0, self._NOTIFICATIONS_COLUMN_WIDTH]
+            range=[0, self._STATUS_COLUMN_WIDTH]
         )
         figure["layout"][f"yaxis{axis_number}"].update(range=[0, row_height])
         # Get the color:
-        notification_color = self._NOTIFICATION_COLORS[drift_result[0]]
+        notification_color = self._STATUS_COLORS[drift_result[0]]
         half_transparent_notification_color = notification_color.replace(
             "rgb", "rgba"
         ).replace(")", ",0.5)")
@@ -434,8 +440,8 @@ class FeaturesDriftTablePlot:
         # size of the text as well):
         y0 = 36 + (row_height - self._FEATURE_ROW_HEIGHT)
         y1 = y0 + self._FONT_SIZE
-        x0 = (self._NOTIFICATIONS_COLUMN_WIDTH / 2) - ((y1 - y0) / 2)
-        x1 = (self._NOTIFICATIONS_COLUMN_WIDTH / 2) + ((y1 - y0) / 2)
+        x0 = (self._STATUS_COLUMN_WIDTH / 2) - ((y1 - y0) / 2)
+        x1 = (self._STATUS_COLUMN_WIDTH / 2) + ((y1 - y0) / 2)
         # Draw the circle on top of the figure:
         figure.add_shape(
@@ -486,7 +492,7 @@ class FeaturesDriftTablePlot:
             self._FEATURE_NAME_COLUMN_WIDTH
             + sum(self._value_columns_widths)
             + self._HISTOGRAMS_COLUMN_WIDTH
-            + self._NOTIFICATIONS_COLUMN_WIDTH
+            + self._STATUS_COLUMN_WIDTH
         )
         height = 2 * self._HEADER_ROW_HEIGHT + len(features) * row_height
@@ -507,7 +513,7 @@ class FeaturesDriftTablePlot:
                 (self._FEATURE_NAME_COLUMN_WIDTH + sum(self._value_columns_widths))
                 / width,
                 self._HISTOGRAMS_COLUMN_WIDTH / width,
-                self._NOTIFICATIONS_COLUMN_WIDTH / width,
+                self._STATUS_COLUMN_WIDTH / width,
             ],
             horizontal_spacing=0,
             vertical_spacing=0,
@@ -518,9 +524,11 @@ class FeaturesDriftTablePlot:
         main_figure.add_trace(header_trace, row=1, col=1)
         main_figure.add_trace(sub_header_trace, row=2, col=1)
-        # Start going over the features and plot each row, histogram and notification:
-        row = 3  # We are currently at row 3 counting the headers.
-        for feature in features:
+        # Start going over the features and plot each row, histogram and status
+        for row, feature in enumerate(
+            features,
+            start=3,  # starting from row 3 after the headers
+        ):
             try:
                 # Add the feature values:
                 main_figure.add_trace(
@@ -543,23 +551,22 @@ class FeaturesDriftTablePlot:
                     f"{inputs_statistics.keys() = }\n"
                 )
             # Add the histograms (both traces are added to the same subplot figure):
-            sample_hist, input_hist = self._plot_histogram_scatters(
+            self._plot_histogram_bars(
+                figure_add_trace=functools.partial(
+                    main_figure.add_trace, row=row, col=2
+                ),
                 sample_hist=sample_set_statistics[feature]["hist"],
                 input_hist=inputs_statistics[feature]["hist"],
+                # Only the first row should have its legend visible
+                showlegend=(row == 3),
             )
-            if row != 3:  # Only the first row should have its legend visible:
-                sample_hist.showlegend = False
-                input_hist.showlegend = False
-            main_figure.add_trace(sample_hist, row=row, col=2)
-            main_figure.add_trace(input_hist, row=row, col=2)
-            # Add the notification (a circle with color according to the drift alert):
-            self._plot_notification_circle(
+            # Add the status (a circle with color according to the drift status)
+            self._plot_status_circle(
                 figure=main_figure,
                 row=row,
                 row_height=row_height,
                 drift_result=drift_results[feature],
             )
-            row += 1
         # Configure the layout and axes for height and widths:
         main_figure.update_layout(
@@ -576,9 +583,11 @@ class FeaturesDriftTablePlot:
                 "yanchor": "top",
                 "y": 1.0 - (self._HEADER_ROW_HEIGHT / height) + 0.002,
                 "xanchor": "right",
-                "x": 1.0 - (self._NOTIFICATIONS_COLUMN_WIDTH / width) - 0.01,
+                "x": 1.0 - (self._STATUS_COLUMN_WIDTH / width) - 0.01,
                 "bgcolor": "rgba(0,0,0,0)",
             },
+            barmode="overlay",
+            bargap=0,
         )
         main_figure.update_xaxes(
             showticklabels=False,

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -20,15 +20,14 @@ import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas
 from mlrun.common.schemas.model_monitoring import (
     EventFieldType,
-    MonitoringFunctionNames,
 )
-from mlrun.errors import MLRunValueError
 from mlrun.model_monitoring.model_endpoint import ModelEndpoint
 from mlrun.utils import logger
 if typing.TYPE_CHECKING:
     from mlrun.db.base import RunDBInterface
     from mlrun.projects import MlrunProject
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
 class _BatchDict(typing.TypedDict):
@@ -41,29 +40,32 @@ class _MLRunNoRunsFoundError(Exception):
     pass
-def get_stream_path(project: str = None, application_name: str = None):
+def get_stream_path(
+    project: str = None,
+    function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
+):
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
     :param project:             Project name.
-    :param application_name:    Application name, None for model_monitoring_stream.
+    :param function_name:    Application name. Default is model_monitoring_stream.
     :return:                    Monitoring stream path to the relevant application.
     """
     stream_uri = mlrun.get_secret_or_env(
         mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
-        if application_name is None
+        if function_name is mm_constants.MonitoringFunctionNames.STREAM
         else ""
     ) or mlrun.mlconf.get_model_monitoring_file_target_path(
         project=project,
         kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
         target="online",
-        application_name=application_name,
+        function_name=function_name,
     )
     return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
-        stream_uri=stream_uri, project=project, application_name=application_name
+        stream_uri=stream_uri, project=project, function_name=function_name
     )
@@ -125,24 +127,31 @@ def _get_monitoring_time_window_from_controller_run(
     project: str, db: "RunDBInterface"
 ) -> datetime.timedelta:
     """
-    Get timedelta for the controller to run.
+    Get the base period form the controller.
     :param project: Project name.
     :param db:      DB interface.
     :return:    Timedelta for the controller to run.
+    :raise:     MLRunNotFoundError if the controller isn't deployed yet
     """
-    run_name = MonitoringFunctionNames.APPLICATION_CONTROLLER
-    runs = db.list_runs(project=project, name=run_name, sort=True)
-    if not runs:
-        raise _MLRunNoRunsFoundError(f"No {run_name} runs were found")
-    last_run = runs[0]
-    try:
-        batch_dict = last_run["spec"]["parameters"]["batch_intervals_dict"]
-    except KeyError:
-        raise MLRunValueError(
-            f"Could not find `batch_intervals_dict` in {run_name} run"
-        )
+    controller = db.get_function(
+        name=mm_constants.MonitoringFunctionNames.APPLICATION_CONTROLLER,
+        project=project,
+    )
+    if isinstance(controller, dict):
+        controller = mlrun.runtimes.RemoteRuntime.from_dict(controller)
+    elif not hasattr(controller, "to_dict"):
+        raise mlrun.errors.MLRunNotFoundError()
+    base_period = controller.spec.config["spec.triggers.cron_interval"]["attributes"][
+        "interval"
+    ]
+    batch_dict = {
+        mm_constants.EventFieldType.MINUTES: int(base_period[:-1]),
+        mm_constants.EventFieldType.HOURS: 0,
+        mm_constants.EventFieldType.DAYS: 0,
+    }
     return batch_dict2timedelta(batch_dict)
@@ -177,9 +186,9 @@ def update_model_endpoint_last_request(
     else:
         try:
             time_window = _get_monitoring_time_window_from_controller_run(project, db)
-        except _MLRunNoRunsFoundError:
+        except mlrun.errors.MLRunNotFoundError:
             logger.debug(
-                "Not bumping model endpoint last request time - no controller runs were found"
+                "Not bumping model endpoint last request time - the monitoring controller isn't deployed yet"
             )
             return

mlrun/model_monitoring/metrics/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

mlrun/model_monitoring/metrics/histogram_distance.py ADDED Viewed

@@ -0,0 +1,127 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import abc
+import dataclasses
+from typing import ClassVar, Optional
+import numpy as np
+@dataclasses.dataclass
+class HistogramDistanceMetric(abc.ABC):
+    """
+    An abstract base class for distance metrics between histograms.
+    :args distrib_t: array of distribution t (usually the latest dataset distribution)
+    :args distrib_u: array of distribution u (usually the sample dataset distribution)
+    Each distribution must contain nonnegative floats that sum up to 1.0.
+    """
+    distrib_t: np.ndarray
+    distrib_u: np.ndarray
+    NAME: ClassVar[str]
+    # noinspection PyMethodOverriding
+    def __init_subclass__(cls, *, metric_name: str, **kwargs) -> None:
+        super().__init_subclass__(**kwargs)
+        cls.NAME = metric_name
+    @abc.abstractmethod
+    def compute(self) -> float:
+        raise NotImplementedError
+class TotalVarianceDistance(HistogramDistanceMetric, metric_name="tvd"):
+    """
+    Provides a symmetric drift distance between two periods t and u
+    Z - vector of random variables
+    Pt - Probability distribution over time span t
+    """
+    def compute(self) -> float:
+        """
+        Calculate Total Variance distance.
+        :returns:  Total Variance Distance.
+        """
+        return np.sum(np.abs(self.distrib_t - self.distrib_u)) / 2
+class HellingerDistance(HistogramDistanceMetric, metric_name="hellinger"):
+    """
+    Hellinger distance is an f divergence measure, similar to the Kullback-Leibler (KL) divergence.
+    It used to quantify the difference between two probability distributions.
+    However, unlike KL Divergence the Hellinger divergence is symmetric and bounded over a probability space.
+    The output range of Hellinger distance is [0,1]. The closer to 0, the more similar the two distributions.
+    """
+    def compute(self) -> float:
+        """
+        Calculate Hellinger Distance
+        :returns: Hellinger Distance
+        """
+        return np.sqrt(
+            max(
+                1 - np.sum(np.sqrt(self.distrib_u * self.distrib_t)),
+                0,  # numerical errors may produce small negative numbers, e.g. -1e-16.
+                # However, Cauchy-Schwarz inequality assures this number is in the range [0, 1]
+            )
+        )
+class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
+    """
+    KL Divergence (or relative entropy) is a measure of how one probability distribution differs from another.
+    It is an asymmetric measure (thus it's not a metric) and it doesn't satisfy the triangle inequality.
+    KL Divergence of 0, indicates two identical distributions.
+    """
+    @staticmethod
+    def _calc_kl_div(
+        actual_dist: np.ndarray, expected_dist: np.ndarray, zero_scaling: float
+    ) -> float:
+        """Return the asymmetric KL divergence"""
+        # We take 0*log(0) == 0 for this calculation
+        mask = actual_dist != 0
+        actual_dist = actual_dist[mask]
+        expected_dist = expected_dist[mask]
+        with np.errstate(over="ignore"):
+            # Ignore overflow warnings when dividing by small numbers,
+            # resulting in inf:
+            # RuntimeWarning: overflow encountered in true_divide
+            relative_prob = actual_dist / np.where(
+                expected_dist != 0, expected_dist, zero_scaling
+            )
+        return np.sum(actual_dist * np.log(relative_prob))
+    def compute(
+        self, capping: Optional[float] = None, zero_scaling: float = 1e-4
+    ) -> float:
+        """
+        :param capping:      A bounded value for the KL Divergence. For infinite distance, the result is replaced with
+                             the capping value which indicates a huge differences between the distributions.
+        :param zero_scaling: Will be used to replace 0 values for executing the logarithmic operation.
+        :returns: symmetric KL Divergence
+        """
+        t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, zero_scaling)
+        u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, zero_scaling)
+        result = t_u + u_t
+        if capping and result == float("inf"):
+            return capping
+        return result

mlrun/model_monitoring/stores/kv_model_endpoint_store.py CHANGED Viewed

@@ -302,7 +302,7 @@ class KVModelEndpointStore(ModelEndpointStore):
                 )
         # Final cleanup of tsdb path
         tsdb_path.replace("://u", ":///u")
-        store, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
+        store, _, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
         store.rm(tsdb_path, recursive=True)
     def get_endpoint_real_time_metrics(
@@ -538,24 +538,24 @@ class KVModelEndpointStore(ModelEndpointStore):
             and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
             == "null"
         ):
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.METRICS
-            ] = json.dumps(
-                {
-                    mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
+                json.dumps(
+                    {
+                        mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+                        }
                     }
-                }
+                )
             )
         # Validate key `uid` instead of `endpoint_id`
         # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
         if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.UID
-            ] = endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
-            ]
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
+                endpoint[
+                    mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
+                ]
+            )
     @staticmethod
     def _encode_field(field: typing.Union[str, bytes]) -> bytes:

mlrun/model_monitoring/stores/sql_model_endpoint_store.py CHANGED Viewed

@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
 class SQLModelEndpointStore(ModelEndpointStore):
     """
     Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
     SQL toolkit that handles the communication with the database.  When using SQL for storing the model endpoints

mlrun/package/packagers/pandas_packagers.py CHANGED Viewed

@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
         """
         if isinstance(obj, dict):
             for key, value in obj.items():
-                obj[
-                    PandasDataFramePackager._prepare_result(obj=key)
-                ] = PandasDataFramePackager._prepare_result(obj=value)
+                obj[PandasDataFramePackager._prepare_result(obj=key)] = (
+                    PandasDataFramePackager._prepare_result(obj=value)
+                )
         elif isinstance(obj, list):
             for i, value in enumerate(obj):
                 obj[i] = PandasDataFramePackager._prepare_result(obj=value)

mlrun/package/utils/_archiver.py CHANGED Viewed

@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
         # Extract:
         with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
-            tar_file.extractall(directory_path)
+            # use 'data' to ensure no security risks are imposed by the archive files
+            # see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
+            tar_file.extractall(directory_path, filter="data")
         return str(directory_path)

mlrun 1.7.0rc3__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc3py3-none-any.whl → 1.7.0rc4py3-none-any.whl