PyPI - mlrun - Versions diffs - 1.6.0rc20__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl - Mend

mlrun 1.6.0rc20py3-none-any.whl → 1.6.0rc22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (51) hide show

mlrun/artifacts/base.py +6 -6
mlrun/artifacts/dataset.py +15 -8
mlrun/artifacts/manager.py +6 -3
mlrun/artifacts/model.py +2 -2
mlrun/artifacts/plots.py +8 -8
mlrun/config.py +1 -1
mlrun/data_types/to_pandas.py +1 -1
mlrun/datastore/azure_blob.py +12 -16
mlrun/datastore/base.py +32 -10
mlrun/datastore/datastore_profile.py +4 -4
mlrun/datastore/dbfs_store.py +12 -11
mlrun/datastore/filestore.py +2 -1
mlrun/datastore/google_cloud_storage.py +11 -10
mlrun/datastore/redis.py +2 -1
mlrun/datastore/s3.py +12 -15
mlrun/datastore/sources.py +16 -11
mlrun/datastore/targets.py +2 -13
mlrun/datastore/v3io.py +18 -20
mlrun/db/httpdb.py +76 -7
mlrun/errors.py +4 -0
mlrun/execution.py +13 -4
mlrun/feature_store/api.py +3 -4
mlrun/launcher/base.py +4 -4
mlrun/lists.py +0 -6
mlrun/model.py +8 -1
mlrun/model_monitoring/api.py +9 -31
mlrun/model_monitoring/batch.py +14 -13
mlrun/model_monitoring/controller.py +100 -70
mlrun/model_monitoring/controller_handler.py +1 -3
mlrun/model_monitoring/helpers.py +65 -20
mlrun/model_monitoring/stream_processing.py +0 -3
mlrun/projects/operations.py +1 -1
mlrun/projects/project.py +10 -4
mlrun/runtimes/base.py +6 -1
mlrun/runtimes/constants.py +11 -0
mlrun/runtimes/databricks_job/databricks_runtime.py +7 -9
mlrun/runtimes/kubejob.py +1 -1
mlrun/runtimes/local.py +64 -53
mlrun/runtimes/serving.py +8 -1
mlrun/serving/routers.py +7 -20
mlrun/serving/server.py +4 -14
mlrun/serving/utils.py +0 -3
mlrun/utils/helpers.py +10 -2
mlrun/utils/logger.py +5 -5
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +5 -3
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +51 -51
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0

mlrun/datastore/s3.py CHANGED Viewed

@@ -33,7 +33,7 @@ class S3Store(DataStore):
         self.headers = None
-        access_key = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
+        access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
         secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
         endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
         force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
@@ -43,7 +43,7 @@ class S3Store(DataStore):
         # If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
         if assume_role_arn:
             client = boto3.client(
-                "sts", aws_access_key_id=access_key, aws_secret_access_key=secret_key
+                "sts", aws_access_key_id=access_key_id, aws_secret_access_key=secret_key
             )
             self._temp_credentials = client.assume_role(
                 RoleArn=assume_role_arn, RoleSessionName="assumeRoleSession"
@@ -74,11 +74,11 @@ class S3Store(DataStore):
             )
             return
-        if access_key or secret_key or force_non_anonymous:
+        if access_key_id or secret_key or force_non_anonymous:
             self.s3 = boto3.resource(
                 "s3",
                 region_name=region,
-                aws_access_key_id=access_key,
+                aws_access_key_id=access_key_id,
                 aws_secret_access_key=secret_key,
                 endpoint_url=endpoint_url,
             )
@@ -109,18 +109,15 @@ class S3Store(DataStore):
             res["spark.hadoop.fs.s3a.aws.profile"] = st.get("profile")
         return res
-    def get_filesystem(self, silent=False):
+    @property
+    def filesystem(self):
         """return fsspec file system object, if supported"""
         if self._filesystem:
             return self._filesystem
         try:
             import s3fs  # noqa
         except ImportError as exc:
-            if not silent:
-                raise ImportError(
-                    "AWS s3fs not installed, run pip install s3fs"
-                ) from exc
-            return None
+            raise ImportError("AWS s3fs not installed") from exc
         filesystem_class = get_filesystem_class(protocol=self.kind)
         self._filesystem = makeDatastoreSchemaSanitizer(
             filesystem_class,
@@ -133,19 +130,19 @@ class S3Store(DataStore):
         force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
         profile = self._get_secret_or_env("AWS_PROFILE")
         endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
-        key = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
+        access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
         secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
         if self._temp_credentials:
-            key = self._temp_credentials["AccessKeyId"]
+            access_key_id = self._temp_credentials["AccessKeyId"]
             secret = self._temp_credentials["SecretAccessKey"]
             token = self._temp_credentials["SessionToken"]
         else:
             token = None
         storage_options = dict(
-            anon=not (force_non_anonymous or (key and secret)),
-            key=key,
+            anon=not (force_non_anonymous or (access_key_id and secret)),
+            key=access_key_id,
             secret=secret,
             token=token,
         )
@@ -157,7 +154,7 @@ class S3Store(DataStore):
         if profile:
             storage_options["profile"] = profile
-        return storage_options
+        return self._sanitize_storage_options(storage_options)
     def get_bucket_and_key(self, key):
         path = self._join(key)[1:]

mlrun/datastore/sources.py CHANGED Viewed

@@ -20,6 +20,7 @@ from datetime import datetime
 from typing import Dict, List, Optional, Union
 import pandas as pd
+import semver
 import v3io
 import v3io.dataplane
 from nuclio import KafkaTrigger
@@ -1018,6 +1019,20 @@ class KafkaSource(OnlineSource):
             max_workers=extra_attributes.pop("max_workers", 4),
         )
         function = function.add_trigger("kafka", trigger)
+        # ML-5499
+        bug_fix_version = "1.12.10"
+        if config.nuclio_version and semver.VersionInfo.parse(
+            config.nuclio_version
+        ) < semver.VersionInfo.parse(bug_fix_version):
+            warnings.warn(
+                f"Detected nuclio version {config.nuclio_version}, which is older "
+                f"than {bug_fix_version}. Forcing number of replicas of 1 in function '{function.metadata.name}'. "
+                f"To resolve this, please upgrade Nuclio."
+            )
+            function.spec.min_replicas = 1
+            function.spec.max_replicas = 1
         return function
@@ -1038,7 +1053,6 @@ class SQLSource(BaseSourceDriver):
         db_url: str = None,
         table_name: str = None,
         spark_options: dict = None,
-        time_fields: List[str] = None,
         parse_dates: List[str] = None,
         **kwargs,
     ):
@@ -1063,17 +1077,8 @@ class SQLSource(BaseSourceDriver):
         :param table_name:      the name of the collection to access,
                                 from the current database
         :param spark_options:   additional spark read options
-        :param time_fields :    all the field to be parsed as timestamp.
         :param parse_dates :    all the field to be parsed as timestamp.
         """
-        if time_fields:
-            warnings.warn(
-                "'time_fields' is deprecated, use 'parse_dates' instead. "
-                "This will be removed in 1.6.0",
-                # TODO: Remove this in 1.6.0
-                FutureWarning,
-            )
-            parse_dates = time_fields
         db_url = db_url or mlrun.mlconf.sql.url
         if db_url is None:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1081,7 +1086,7 @@ class SQLSource(BaseSourceDriver):
             )
         if time_field:
             if parse_dates:
-                time_fields.append(time_field)
+                parse_dates.append(time_field)
             else:
                 parse_dates = [time_field]
         attrs = {

mlrun/datastore/targets.py CHANGED Viewed

@@ -17,7 +17,6 @@ import os
 import random
 import sys
 import time
-import warnings
 from collections import Counter
 from copy import copy
 from typing import Any, Dict, List, Optional, Union
@@ -553,7 +552,7 @@ class BaseStoreTarget(DataTargetBase):
         else:
             store, target_path = self._get_store_and_path()
             target_path = generate_path_with_chunk(self, chunk_id, target_path)
-            file_system = store.get_filesystem(False)
+            file_system = store.filesystem
             if file_system.protocol == "file":
                 dir = os.path.dirname(target_path)
                 if dir:
@@ -1407,7 +1406,7 @@ class StreamTarget(BaseStoreTarget):
         from storey import V3ioDriver
         key_columns = list(key_columns.keys())
-        endpoint, uri = parse_path(self.get_target_path())
+        endpoint, uri = parse_path(self.path)
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
@@ -1680,7 +1679,6 @@ class SQLTarget(BaseStoreTarget):
         if_exists: str = "append",
         create_table: bool = False,
         # create_according_to_data: bool = False,
-        time_fields: List[str] = None,
         varchar_len: int = 50,
         parse_dates: List[str] = None,
     ):
@@ -1718,20 +1716,11 @@ class SQLTarget(BaseStoreTarget):
         :param create_table:                pass True if you want to create new table named by
                                             table_name with schema on current database.
         :param create_according_to_data:    (not valid)
-        :param time_fields :    all the field to be parsed as timestamp.
         :param varchar_len :    the defalut len of the all the varchar column (using if needed to create the table).
         :param parse_dates :    all the field to be parsed as timestamp.
         """
         create_according_to_data = False  # TODO: open for user
-        if time_fields:
-            warnings.warn(
-                "'time_fields' is deprecated, use 'parse_dates' instead. "
-                "This will be removed in 1.6.0",
-                # TODO: Remove this in 1.6.0
-                FutureWarning,
-            )
-            parse_dates = time_fields
         db_url = db_url or mlrun.mlconf.sql.url
         if db_url is None or table_name is None:
             attr = {}

mlrun/datastore/v3io.py CHANGED Viewed

@@ -19,6 +19,7 @@ from copy import deepcopy
 from datetime import datetime
 import fsspec
+import requests
 import v3io.dataplane
 import mlrun
@@ -73,26 +74,20 @@ class V3ioStore(DataStore):
         schema = "https" if self.secure else "http"
         return f"{schema}://{self.endpoint}"
-    def get_filesystem(self, silent=True):
+    @property
+    def filesystem(self):
         """return fsspec file system object, if supported"""
         if self._filesystem:
             return self._filesystem
-        try:
-            import v3iofs  # noqa
-        except ImportError as exc:
-            if not silent:
-                raise ImportError(
-                    "v3iofs or storey not installed, run pip install storey"
-                ) from exc
-            return None
         self._filesystem = fsspec.filesystem("v3io", **self.get_storage_options())
         return self._filesystem
     def get_storage_options(self):
-        return dict(
+        res = dict(
             v3io_access_key=self._get_secret_or_env("V3IO_ACCESS_KEY"),
             v3io_api=mlrun.mlconf.v3io_api,
         )
+        return self._sanitize_storage_options(res)
     def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
         """helper function for upload method, allows for controlling max_chunk_size in testing"""
@@ -150,15 +145,18 @@ class V3ioStore(DataStore):
             data = memoryview(data)
         except TypeError:
             pass
-        while buffer_offset < buffer_size:
-            chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
-            http_put(
-                self.url + self._join(key),
-                data[buffer_offset : buffer_offset + chunk_size],
-                append_header if buffer_offset else self.headers,
-                None,
-            )
-            buffer_offset += chunk_size
+        with requests.Session() as requests_session:
+            while buffer_offset < buffer_size:
+                chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
+                http_put(
+                    self.url + self._join(key),
+                    data[buffer_offset : buffer_offset + chunk_size],
+                    append_header if buffer_offset else self.headers,
+                    None,
+                    requests_session,
+                )
+                buffer_offset += chunk_size
     def put(self, key, data, append=False):
         return self._put(key, data)
@@ -206,7 +204,7 @@ class V3ioStore(DataStore):
         """Recursive rm file/folder
         Workaround for v3io-fs not supporting recursive directory removal"""
-        file_system = self.get_filesystem()
+        file_system = self.filesystem
         if isinstance(path, str):
             path = [path]
         maxdepth = maxdepth if not maxdepth else maxdepth - 1

mlrun/db/httpdb.py CHANGED Viewed

@@ -707,7 +707,7 @@ class HTTPRunDB(RunDBInterface):
         :param state: List only runs whose state is specified.
         :param sort: Whether to sort the result according to their start time. Otherwise, results will be
             returned by their internal order in the DB (order will not be guaranteed).
-        :param last: Deprecated - currently not used.
+        :param last: Deprecated - currently not used (will be removed in 1.8.0).
         :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
         :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
         :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -733,6 +733,13 @@ class HTTPRunDB(RunDBInterface):
                 "using the `with_notifications` flag."
             )
+        if last:
+            # TODO: Remove this in 1.8.0
+            warnings.warn(
+                "'last' is deprecated and will be removed in 1.8.0.",
+                FutureWarning,
+            )
         if (
             not name
             and not uid
@@ -1310,12 +1317,12 @@ class HTTPRunDB(RunDBInterface):
     def remote_builder(
         self,
-        func,
-        with_mlrun,
-        mlrun_version_specifier=None,
-        skip_deployed=False,
-        builder_env=None,
-        force_build=False,
+        func: BaseRuntime,
+        with_mlrun: bool,
+        mlrun_version_specifier: Optional[str] = None,
+        skip_deployed: bool = False,
+        builder_env: Optional[dict] = None,
+        force_build: bool = False,
     ):
         """Build the pod image for a function, for execution on a remote cluster. This is executed by the MLRun
         API server, and creates a Docker image out of the function provided and any specific build
@@ -1330,6 +1337,20 @@ class HTTPRunDB(RunDBInterface):
         :param builder_env:   Kaniko builder pod env vars dict (for config/credentials)
         :param force_build:   Force building the image, even when no changes were made
         """
+        is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
+            "s3://"
+        )
+        is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
+        if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
+            logger.warning(
+                "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
+                "keys. Only the permissions granted to the platform's configured secret will take affect "
+                "(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
+                "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
+                source=func.spec.build.source,
+                load_source_on_run=func.spec.build.load_source_on_run,
+                default_docker_registry=config.httpdb.builder.docker_registry,
+            )
         try:
             req = {
@@ -1466,6 +1487,54 @@ class HTTPRunDB(RunDBInterface):
         response = self.api_call("GET", path, error_message)
         return mlrun.common.schemas.BackgroundTask(**response.json())
+    def list_project_background_tasks(
+        self,
+        project: Optional[str] = None,
+        state: Optional[str] = None,
+        created_from: Optional[datetime] = None,
+        created_to: Optional[datetime] = None,
+        last_update_time_from: Optional[datetime] = None,
+        last_update_time_to: Optional[datetime] = None,
+    ) -> list[mlrun.common.schemas.BackgroundTask]:
+        """
+        Retrieve updated information on project background tasks being executed.
+        If no filter is provided, will return background tasks from the last week.
+        :param project: Project name (defaults to mlrun.mlconf.default_project).
+        :param state:   List only background tasks whose state is specified.
+        :param created_from: Filter by background task created time in ``[created_from, created_to]``.
+        :param created_to:  Filter by background task created time in ``[created_from, created_to]``.
+        :param last_update_time_from: Filter by background task last update time in
+            ``(last_update_time_from, last_update_time_to)``.
+        :param last_update_time_to: Filter by background task last update time in
+            ``(last_update_time_from, last_update_time_to)``.
+        """
+        project = project or config.default_project
+        if (
+            not state
+            and not created_from
+            and not created_to
+            and not last_update_time_from
+            and not last_update_time_to
+        ):
+            # default to last week on no filter
+            created_from = datetime.now() - timedelta(days=7)
+        params = {
+            "state": state,
+            "created_from": datetime_to_iso(created_from),
+            "created_to": datetime_to_iso(created_to),
+            "last_update_time_from": datetime_to_iso(last_update_time_from),
+            "last_update_time_to": datetime_to_iso(last_update_time_to),
+        }
+        path = f"projects/{project}/background-tasks"
+        error_message = f"Failed listing project background task. project={project}"
+        response = self.api_call("GET", path, error_message, params=params)
+        return mlrun.common.schemas.BackgroundTaskList(
+            **response.json()
+        ).background_tasks
     def get_background_task(self, name: str) -> mlrun.common.schemas.BackgroundTask:
         """Retrieve updated information on a background task being executed."""

mlrun/errors.py CHANGED Viewed

@@ -207,6 +207,10 @@ class MLRunTaskCancelledError(Exception):
     pass
+class MLRunValueError(ValueError):
+    pass
 class MLRunFatalFailureError(Exception):
     """
     Internal exception meant to be used inside mlrun.utils.helpers.retry_until_successful to signal the loop not to

mlrun/execution.py CHANGED Viewed

@@ -411,7 +411,7 @@ class MLClientCtx(object):
                 self._artifacts_manager.artifacts[key] = artifact_obj
             self._state = status.get("state", self._state)
-        # Do not store run if not logging worker to avoid conflicts like host label
+        # No need to store the run for every worker
         if store_run and self.is_logging_worker():
             self.store_run()
         return self
@@ -434,6 +434,12 @@ class MLClientCtx(object):
             context.set_label("framework", "sklearn")
         """
+        if not self.is_logging_worker():
+            logger.warning(
+                "Setting labels is only supported in the logging worker, ignoring"
+            )
+            return
         if replace or not self._labels.get(key):
             self._labels[key] = str(value)
@@ -974,10 +980,11 @@ class MLClientCtx(object):
         """
         # If it's a OpenMPI job, get the global rank and compare to the logging rank (worker) set in MLRun's
         # configuration:
-        if self.labels.get("kind", "job") == "mpijob":
+        labels = self.labels
+        if "host" in labels and labels.get("kind", "job") == "mpijob":
             # The host (pod name) of each worker is created by k8s, and by default it uses the rank number as the id in
             # the following template: ...-worker-<rank>
-            rank = int(self.labels["host"].rsplit("-", 1)[1])
+            rank = int(labels["host"].rsplit("-", 1)[1])
             return rank == mlrun.mlconf.packagers.logging_worker
         # Single worker is always the logging worker:
@@ -1004,7 +1011,6 @@ class MLClientCtx(object):
                 _struct[key] = val
         struct = {
-            "metadata.labels": self._labels,
             "metadata.annotations": self._annotations,
             "spec.parameters": self._parameters,
             "spec.outputs": self._outputs,
@@ -1019,6 +1025,9 @@ class MLClientCtx(object):
         if self._state != "completed":
             struct["status.state"] = self._state
+        if self.is_logging_worker():
+            struct["metadata.labels"] = self._labels
         set_if_not_none(struct, "status.error", self._error)
         set_if_not_none(struct, "status.commit", self._commit)
         set_if_not_none(struct, "status.iterations", self._iteration_results)

mlrun/feature_store/api.py CHANGED Viewed

@@ -933,7 +933,7 @@ def _deploy_ingestion_service_v2(
         source = HTTPSource()
         func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
         config = RunConfig(function=func)
-        fstore.deploy_ingestion_service(my_set, source, run_config=config)
+        my_set.deploy_ingestion_service(source, run_config=config)
     :param featureset:    feature set object or uri
     :param source:        data source object describing the online or offline source
@@ -1025,7 +1025,7 @@ def deploy_ingestion_service(
         source = HTTPSource()
         func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
         config = RunConfig(function=func)
-        fstore.deploy_ingestion_service(my_set, source, run_config=config)
+        my_set.deploy_ingestion_service(source, run_config=config)
     :param featureset:    feature set object or uri
     :param source:        data source object describing the online or offline source
@@ -1036,8 +1036,7 @@ def deploy_ingestion_service(
     :return: URL to access the deployed ingestion service
     """
-    endpoint, _ = deploy_ingestion_service_v2(
-        featureset=featureset,
+    endpoint, _ = featureset.deploy_ingestion_service(
         source=source,
         targets=targets,
         name=name,

mlrun/launcher/base.py CHANGED Viewed

@@ -396,10 +396,10 @@ class BaseLauncher(abc.ABC):
                 status=run.status.state,
                 name=run.metadata.name,
             )
-            if run.status.state in [
-                mlrun.runtimes.constants.RunStates.error,
-                mlrun.runtimes.constants.RunStates.aborted,
-            ]:
+            if (
+                run.status.state
+                in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
+            ):
                 if runtime._is_remote and not runtime.is_child:
                     logger.error(
                         "Run did not finish successfully",

mlrun/lists.py CHANGED Viewed

@@ -227,9 +227,3 @@ class ArtifactList(list):
             if artifact:
                 dataitems.append(mlrun.get_dataitem(artifact))
         return dataitems
-class FunctionList(list):
-    def __init__(self):
-        pass
-        # TODO

mlrun/model.py CHANGED Viewed

@@ -1259,8 +1259,15 @@ class RunObject(RunTemplate):
         """error string if failed"""
         if self.status:
             unknown_error = ""
-            if self.status.state in mlrun.runtimes.constants.RunStates.error_states():
+            if (
+                self.status.state
+                in mlrun.runtimes.constants.RunStates.abortion_states()
+            ):
+                unknown_error = "Run was aborted"
+            elif self.status.state in mlrun.runtimes.constants.RunStates.error_states():
                 unknown_error = "Unknown error"
             return (
                 self.status.error
                 or self.status.reason

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -132,7 +132,6 @@ def record_results(
     drift_threshold: typing.Optional[float] = None,
     possible_drift_threshold: typing.Optional[float] = None,
     trigger_monitoring_job: bool = False,
-    last_in_batch_set: typing.Optional[bool] = True,
     artifacts_tag: str = "",
     default_batch_image="mlrun/mlrun",
 ) -> ModelEndpoint:
@@ -165,14 +164,6 @@ def record_results(
     :param possible_drift_threshold: The threshold of which to mark possible drifts.
     :param trigger_monitoring_job:   If true, run the batch drift job. If not exists, the monitoring batch function
                                      will be registered through MLRun API with the provided image.
-    :param last_in_batch_set:        This flag can (and should only) be used when the model endpoint does not have
-                                     model-monitoring set.
-                                     If set to `True` (the default), this flag marks the current monitoring window
-                                     (on this monitoring endpoint) is completed - the data inferred so far is assumed
-                                     to be the total data for this monitoring window.
-                                     You may want to set this flag to `False` if you want to record multiple results in
-                                     close time proximity ("batch set"). In this case, set this flag to `False` on all
-                                     but the last batch in the set.
     :param artifacts_tag:            Tag to use for all the artifacts resulted from the function. Will be relevant
                                      only if the monitoring batch job has been triggered.
@@ -206,25 +197,14 @@ def record_results(
         )
     if model_endpoint.spec.stream_path == "":
-        if last_in_batch_set:
-            logger.info(
-                "Updating the last request time to mark the current monitoring window as completed",
-                project=project,
-                endpoint_id=model_endpoint.metadata.uid,
-            )
-            bump_model_endpoint_last_request(
-                project=project, model_endpoint=model_endpoint, db=db
-            )
-    else:
-        if last_in_batch_set is not None:
-            logger.warning(
-                "`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
-                "Ignoring `last_in_batch_set`, as it is relevant only when the model "
-                "endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
-                " empty). Set `last_in_batch_set` to `None` to resolve this warning.",
-                project=project,
-                endpoint_id=model_endpoint.metadata.uid,
-            )
+        logger.info(
+            "Updating the last request time to mark the current monitoring window as completed",
+            project=project,
+            endpoint_id=model_endpoint.metadata.uid,
+        )
+        bump_model_endpoint_last_request(
+            project=project, model_endpoint=model_endpoint, db=db
+        )
     if trigger_monitoring_job:
         # Run the monitoring batch drift job
@@ -612,9 +592,7 @@ def read_dataset_as_dataframe(
         if label_columns is None:
             label_columns = dataset.status.label_column
         # Get the features and parse to DataFrame:
-        dataset = mlrun.feature_store.get_offline_features(
-            dataset.uri, drop_columns=drop_columns
-        ).to_dataframe()
+        dataset = dataset.get_offline_features(drop_columns=drop_columns).to_dataframe()
     elif isinstance(dataset, (list, np.ndarray)):
         if not feature_columns:

mlrun/model_monitoring/batch.py CHANGED Viewed

@@ -117,20 +117,21 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
     def _calc_kl_div(
         actual_dist: np.array, expected_dist: np.array, kld_scaling: float
     ) -> float:
-        """Return the assymetric KL divergence"""
+        """Return the asymmetric KL divergence"""
+        # We take 0*log(0) == 0 for this calculation
+        mask = actual_dist != 0
+        actual_dist = actual_dist[mask]
+        expected_dist = expected_dist[mask]
         return np.sum(
-            np.where(
-                actual_dist != 0,
-                (actual_dist)
-                * np.log(
-                    actual_dist
-                    / np.where(expected_dist != 0, expected_dist, kld_scaling)
-                ),
-                0,
-            )
+            actual_dist
+            * np.log(
+                actual_dist / np.where(expected_dist != 0, expected_dist, kld_scaling)
+            ),
         )
-    def compute(self, capping: float = None, kld_scaling: float = 1e-4) -> float:
+    def compute(
+        self, capping: Optional[float] = None, kld_scaling: float = 1e-4
+    ) -> float:
         """
         :param capping:      A bounded value for the KL Divergence. For infinite distance, the result is replaced with
                              the capping value which indicates a huge differences between the distributions.
@@ -141,8 +142,8 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
         t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, kld_scaling)
         u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, kld_scaling)
         result = t_u + u_t
-        if capping:
-            return capping if result == float("inf") else result
+        if capping and result == float("inf"):
+            return capping
         return result

mlrun 1.6.0rc20__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc20py3-none-any.whl → 1.6.0rc22py3-none-any.whl