PyPI - mlrun - Versions diffs - 1.6.0rc11__py3-none-any.whl → 1.6.0rc13__py3-none-any.whl - Mend

mlrun 1.6.0rc11py3-none-any.whl → 1.6.0rc13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (32) hide show

mlrun/__main__.py +2 -2
mlrun/config.py +2 -2
mlrun/datastore/azure_blob.py +25 -17
mlrun/datastore/datastore_profile.py +45 -0
mlrun/datastore/dbfs_store.py +3 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/s3.py +4 -4
mlrun/datastore/sources.py +4 -4
mlrun/datastore/targets.py +13 -3
mlrun/feature_store/retrieval/base.py +24 -0
mlrun/feature_store/retrieval/dask_merger.py +8 -0
mlrun/feature_store/retrieval/local_merger.py +9 -2
mlrun/model_monitoring/api.py +37 -6
mlrun/model_monitoring/controller.py +6 -5
mlrun/model_monitoring/controller_handler.py +1 -2
mlrun/model_monitoring/helpers.py +59 -1
mlrun/projects/operations.py +24 -10
mlrun/projects/project.py +52 -32
mlrun/runtimes/databricks_job/databricks_runtime.py +128 -62
mlrun/runtimes/databricks_job/databricks_wrapper.py +0 -1
mlrun/runtimes/function.py +8 -1
mlrun/runtimes/kubejob.py +7 -1
mlrun/serving/server.py +18 -1
mlrun/serving/states.py +5 -1
mlrun/utils/helpers.py +14 -10
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/METADATA +29 -29
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/RECORD +32 -32
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/WHEEL +0 -0
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -547,7 +547,7 @@ def build(
     archive = archive or mlconf.default_archive
     if archive:
         src = b.source or "./"
-        logger.info(f"uploading data from {src} to {archive}")
+        logger.info(f"Uploading data from {src} to {archive}")
         target = archive if archive.endswith("/") else archive + "/"
         target += f"src-{meta.project}-{meta.name}-{meta.tag or 'latest'}.tar.gz"
         mlrun.datastore.utils.upload_tarball(src, target)
@@ -582,7 +582,7 @@ def build(
                 fp.write(image)
             with open("/tmp/fullimage", "w") as fp:
                 fp.write(full_image)
-            print("full image path = ", full_image)
+            print("Full image path = ", full_image)
         print(f"Function built, state={state} image={image}")
     else:

mlrun/config.py CHANGED Viewed

@@ -251,8 +251,8 @@ default_config = {
         },
         "port": 8080,
         "dirpath": expanduser("~/.mlrun/db"),
+        # in production envs we recommend to use a real db (e.g. mysql)
         "dsn": "sqlite:///db/mlrun.db?check_same_thread=false",
-        "old_dsn": "",
         "debug": False,
         "user": "",
         "password": "",
@@ -1130,7 +1130,7 @@ class Config:
     def is_explicit_ack(self) -> bool:
         return self.httpdb.nuclio.explicit_ack == "enabled" and (
-            not self.nuclio_version or self.nuclio_version >= "1.12.7"
+            not self.nuclio_version or self.nuclio_version >= "1.12.9"
         )

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -15,19 +15,21 @@
 import time
 from pathlib import Path
-import fsspec
 from azure.storage.blob import BlobServiceClient
+from fsspec.registry import get_filesystem_class
 import mlrun.errors
 from mlrun.errors import err_to_str
-from .base import DataStore, FileStats
+from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
 # Azure blobs will be represented with the following URL: az://<container name>. The storage account is already
 # pointed to by the connection string, so the user is not expected to specify it in any way.
 class AzureBlobStore(DataStore):
+    using_bucket = True
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self.bsc = None
@@ -50,25 +52,31 @@ class AzureBlobStore(DataStore):
                     f"Azure adlfs not installed, run pip install adlfs, {err_to_str(exc)}"
                 )
             return None
-        self._filesystem = fsspec.filesystem(self.kind, **self.get_storage_options())
+        # in order to support az and wasbs kinds.
+        filesystem_class = get_filesystem_class(protocol=self.kind)
+        self._filesystem = makeDatastoreSchemaSanitizer(
+            filesystem_class,
+            using_bucket=self.using_bucket,
+            **self.get_storage_options(),
+        )
         return self._filesystem
     def get_storage_options(self):
         return dict(
-            account_name=self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME")
-            or self._get_secret_or_env("account_name"),
-            account_key=self._get_secret_or_env("AZURE_STORAGE_KEY")
-            or self._get_secret_or_env("account_key"),
-            connection_string=self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING")
-            or self._get_secret_or_env("connection_string"),
-            tenant_id=self._get_secret_or_env("AZURE_STORAGE_TENANT_ID")
-            or self._get_secret_or_env("tenant_id"),
-            client_id=self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID")
-            or self._get_secret_or_env("client_id"),
-            client_secret=self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET")
-            or self._get_secret_or_env("client_secret"),
-            sas_token=self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN")
-            or self._get_secret_or_env("sas_token"),
+            account_name=self._get_secret_or_env("account_name")
+            or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
+            account_key=self._get_secret_or_env("account_key")
+            or self._get_secret_or_env("AZURE_STORAGE_KEY"),
+            connection_string=self._get_secret_or_env("connection_string")
+            or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
+            tenant_id=self._get_secret_or_env("tenant_id")
+            or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
+            client_id=self._get_secret_or_env("client_id")
+            or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
+            client_secret=self._get_secret_or_env("client_secret")
+            or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
+            sas_token=self._get_secret_or_env("sas_token")
+            or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
             credential=self._get_secret_or_env("credential"),
         )

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -241,6 +241,51 @@ class DatastoreProfileGCS(DatastoreProfile):
         return res if res else None
+class DatastoreProfileAzureBlob(DatastoreProfile):
+    type: str = pydantic.Field("az")
+    _private_attributes = (
+        "connection_string",
+        "account_key",
+        "client_secret",
+        "sas_token",
+        "credential",
+    )
+    connection_string: typing.Optional[str] = None
+    account_name: typing.Optional[str] = None
+    account_key: typing.Optional[str] = None
+    tenant_id: typing.Optional[str] = None
+    client_id: typing.Optional[str] = None
+    client_secret: typing.Optional[str] = None
+    sas_token: typing.Optional[str] = None
+    credential: typing.Optional[str] = None
+    def url(self, subpath) -> str:
+        if subpath.startswith("/"):
+            #  in azure the path after schema is starts with bucket, wherefore it should not start with "/".
+            subpath = subpath[1:]
+        return f"az://{subpath}"
+    def secrets(self) -> dict:
+        res = {}
+        if self.connection_string:
+            res["connection_string"] = self.connection_string
+        if self.account_name:
+            res["account_name"] = self.account_name
+        if self.account_key:
+            res["account_key"] = self.account_key
+        if self.tenant_id:
+            res["tenant_id"] = self.tenant_id
+        if self.client_id:
+            res["client_id"] = self.client_id
+        if self.client_secret:
+            res["client_secret"] = self.client_secret
+        if self.sas_token:
+            res["sas_token"] = self.sas_token
+        if self.credential:
+            res["credential"] = self.credential
+        return res if res else None
 class DatastoreProfile2Json(pydantic.BaseModel):
     @staticmethod
     def _to_json(attributes):

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import pathlib
 from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
+from fsspec.registry import get_filesystem_class
 import mlrun.errors
@@ -86,9 +87,10 @@ class DBFSStore(DataStore):
     def get_filesystem(self, silent=True):
         """return fsspec file system object, if supported"""
+        filesystem_class = get_filesystem_class(protocol=self.kind)
         if not self._filesystem:
             self._filesystem = makeDatastoreSchemaSanitizer(
-                cls=DatabricksFileSystemDisableCache,
+                cls=filesystem_class,
                 using_bucket=False,
                 **self.get_storage_options(),
             )

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -15,6 +15,8 @@ import os
 import tempfile
 from pathlib import Path
+from fsspec.registry import get_filesystem_class
 import mlrun.errors
 from mlrun.utils import logger
@@ -73,8 +75,9 @@ class GoogleCloudStorageStore(DataStore):
                     "Google gcsfs not installed, run pip install gcsfs"
                 ) from exc
             return None
+        filesystem_class = get_filesystem_class(protocol=self.kind)
         self._filesystem = makeDatastoreSchemaSanitizer(
-            gcsfs.core.GCSFileSystem,
+            filesystem_class,
             using_bucket=self.using_bucket,
             **self.get_storage_options(),
         )

mlrun/datastore/s3.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import time
 import boto3
+from fsspec.registry import get_filesystem_class
 import mlrun.errors
@@ -113,17 +114,16 @@ class S3Store(DataStore):
         if self._filesystem:
             return self._filesystem
         try:
-            # noqa
-            import s3fs
+            import s3fs  # noqa
         except ImportError as exc:
             if not silent:
                 raise ImportError(
                     "AWS s3fs not installed, run pip install s3fs"
                 ) from exc
             return None
+        filesystem_class = get_filesystem_class(protocol=self.kind)
         self._filesystem = makeDatastoreSchemaSanitizer(
-            s3fs.S3FileSystem,
+            filesystem_class,
             using_bucket=self.using_bucket,
             **self.get_storage_options(),
         )

mlrun/datastore/sources.py CHANGED Viewed

@@ -177,7 +177,7 @@ class CSVSource(BaseSourceDriver):
             parse_dates.append(time_field)
         data_item = mlrun.store_manager.object(self.path)
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(self.path)
             path = store.url + path
         else:
@@ -193,7 +193,7 @@ class CSVSource(BaseSourceDriver):
         )
     def get_spark_options(self):
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(self.path)
             path = store.url + path
             result = {
@@ -340,7 +340,7 @@ class ParquetSource(BaseSourceDriver):
             attributes["context"] = context
         data_item = mlrun.store_manager.object(self.path)
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(self.path)
             path = store.url + path
         else:
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
         )
     def get_spark_options(self):
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(self.path)
             path = store.url + path
             result = {

mlrun/datastore/targets.py CHANGED Viewed

@@ -877,7 +877,7 @@ class ParquetTarget(BaseStoreTarget):
         else:
             storage_options = storage_options or self.storage_options
-        graph.add_step(
+        step = graph.add_step(
             name=self.name or "ParquetTarget",
             after=after,
             graph_shape="cylinder",
@@ -894,6 +894,16 @@ class ParquetTarget(BaseStoreTarget):
             **self.attributes,
         )
+        original_to_dict = step.to_dict
+        def delete_update_last_written(*arg, **kargs):
+            result = original_to_dict(*arg, **kargs)
+            del result["class_args"]["update_last_written"]
+            return result
+        # update_last_written is not serializable (ML-5108)
+        step.to_dict = delete_update_last_written
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
         partition_cols = []
         if timestamp_key:
@@ -912,7 +922,7 @@ class ParquetTarget(BaseStoreTarget):
                     if unit == time_partitioning_granularity:
                         break
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(
                 self.get_target_path()
             )
@@ -1054,7 +1064,7 @@ class CSVTarget(BaseStoreTarget):
         )
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
-        if self.path.startswith("ds://"):
+        if self.path and self.path.startswith("ds://"):
             store, path = mlrun.store_manager.get_or_create_store(
                 self.get_target_path()
             )

mlrun/feature_store/retrieval/base.py CHANGED Viewed

@@ -310,6 +310,7 @@ class BaseMerger(abc.ABC):
                 "start_time and end_time can only be provided in conjunction with "
                 "a timestamp column, or when the at least one feature_set has a timestamp key"
             )
         # join the feature data frames
         result_timestamp = self.merge(
             entity_timestamp_column=entity_timestamp_column,
@@ -383,6 +384,29 @@ class BaseMerger(abc.ABC):
     def _unpersist_df(self, df):
         pass
+    def _normalize_timestamp_column(
+        self,
+        entity_timestamp_column,
+        reference_df,
+        featureset_timestamp,
+        featureset_df,
+        featureset_name,
+    ):
+        reference_df_timestamp_type = reference_df[entity_timestamp_column].dtype.name
+        featureset_df_timestamp_type = featureset_df[featureset_timestamp].dtype.name
+        if reference_df_timestamp_type != featureset_df_timestamp_type:
+            logger.info(
+                f"Merger detected timestamp resolution incompatibility between feature set {featureset_name} and "
+                f"others: {reference_df_timestamp_type} and {featureset_df_timestamp_type}. Converting feature set "
+                f"timestamp column '{featureset_timestamp}' to type {reference_df_timestamp_type}."
+            )
+            featureset_df[featureset_timestamp] = featureset_df[
+                featureset_timestamp
+            ].astype(reference_df_timestamp_type)
+        return featureset_df
     def merge(
         self,
         entity_timestamp_column: str,

mlrun/feature_store/retrieval/dask_merger.py CHANGED Viewed

@@ -52,6 +52,14 @@ class DaskFeatureMerger(BaseMerger):
     ):
         from dask.dataframe.multi import merge_asof
+        featureset_df = self._normalize_timestamp_column(
+            entity_timestamp_column,
+            entity_df,
+            featureset_timestamp,
+            featureset_df,
+            featureset_name,
+        )
         def sort_partition(partition, timestamp):
             return partition.sort_values(timestamp)

mlrun/feature_store/retrieval/local_merger.py CHANGED Viewed

@@ -32,11 +32,10 @@ class LocalFeatureMerger(BaseMerger):
         entity_timestamp_column: str,
         featureset_name,
         featureset_timstamp,
-        featureset_df: list,
+        featureset_df,
         left_keys: list,
         right_keys: list,
     ):
         index_col_not_in_entity = "index" not in entity_df.columns
         index_col_not_in_featureset = "index" not in featureset_df.columns
         entity_df[entity_timestamp_column] = pd.to_datetime(
@@ -48,6 +47,14 @@ class LocalFeatureMerger(BaseMerger):
         entity_df.sort_values(by=entity_timestamp_column, inplace=True)
         featureset_df.sort_values(by=featureset_timstamp, inplace=True)
+        featureset_df = self._normalize_timestamp_column(
+            entity_timestamp_column,
+            entity_df,
+            featureset_timstamp,
+            featureset_df,
+            featureset_name,
+        )
         merged_df = pd.merge_asof(
             entity_df,
             featureset_df,

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import datetime
 import hashlib
@@ -30,6 +29,7 @@ from mlrun.utils import logger
 from .batch import VirtualDrift
 from .features_drift_table import FeaturesDriftTablePlot
+from .helpers import bump_model_endpoint_last_request
 from .model_endpoint import ModelEndpoint
 # A union of all supported dataset types:
@@ -125,13 +125,14 @@ def record_results(
     model_endpoint_name: str,
     endpoint_id: str = "",
     function_name: str = "",
-    context: mlrun.MLClientCtx = None,
-    infer_results_df: pd.DataFrame = None,
-    sample_set_statistics: typing.Dict[str, typing.Any] = None,
+    context: typing.Optional[mlrun.MLClientCtx] = None,
+    infer_results_df: typing.Optional[pd.DataFrame] = None,
+    sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
     monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.enabled,
-    drift_threshold: float = None,
-    possible_drift_threshold: float = None,
+    drift_threshold: typing.Optional[float] = None,
+    possible_drift_threshold: typing.Optional[float] = None,
     trigger_monitoring_job: bool = False,
+    last_in_batch_set: typing.Optional[bool] = True,
     artifacts_tag: str = "",
     default_batch_image="mlrun/mlrun",
 ) -> ModelEndpoint:
@@ -164,6 +165,14 @@ def record_results(
     :param possible_drift_threshold: The threshold of which to mark possible drifts.
     :param trigger_monitoring_job:   If true, run the batch drift job. If not exists, the monitoring batch function
                                      will be registered through MLRun API with the provided image.
+    :param last_in_batch_set:        This flag can (and should only) be used when the model endpoint does not have
+                                     model-monitoring set.
+                                     If set to `True` (the default), this flag marks the current monitoring window
+                                     (on this monitoring endpoint) is completed - the data inferred so far is assumed
+                                     to be the total data for this monitoring window.
+                                     You may want to set this flag to `False` if you want to record multiple results in
+                                     close time proximity ("batch set"). In this case, set this flag to `False` on all
+                                     but the last batch in the set.
     :param artifacts_tag:            Tag to use for all the artifacts resulted from the function. Will be relevant
                                      only if the monitoring batch job has been triggered.
@@ -186,6 +195,7 @@ def record_results(
         monitoring_mode=monitoring_mode,
         db_session=db,
     )
+    logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
     if infer_results_df is not None:
         # Write the monitoring parquet to the relevant model endpoint context
@@ -195,6 +205,27 @@ def record_results(
             infer_results_df=infer_results_df,
         )
+    if model_endpoint.spec.stream_path == "":
+        if last_in_batch_set:
+            logger.info(
+                "Updating the last request time to mark the current monitoring window as completed",
+                project=project,
+                endpoint_id=model_endpoint.metadata.uid,
+            )
+            bump_model_endpoint_last_request(
+                project=project, model_endpoint=model_endpoint, db=db
+            )
+    else:
+        if last_in_batch_set is not None:
+            logger.warning(
+                "`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
+                "Ignoring `last_in_batch_set`, as it is relevant only when the model "
+                "endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
+                " empty). Set `last_in_batch_set` to `None` to resolve this warning.",
+                project=project,
+                endpoint_id=model_endpoint.metadata.uid,
+            )
     if trigger_monitoring_job:
         # Run the monitoring batch drift job
         trigger_drift_batch_job(

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -371,7 +371,7 @@ class MonitoringApplicationController:
         parquet_directory: str,
         storage_options: dict,
         model_monitoring_access_key: str,
-    ):
+    ) -> Optional[Tuple[str, Exception]]:
         """
         Process a model endpoint and trigger the monitoring applications. This function running on different process
         for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -433,7 +433,7 @@ class MonitoringApplicationController:
                                 start_time=start_infer_time,
                                 end_time=end_infer_time,
                             )
-                            return
+                            continue
                     # Continue if not enough events provided since the deployment of the model endpoint
                     except FileNotFoundError:
@@ -442,7 +442,7 @@ class MonitoringApplicationController:
                             endpoint=endpoint[mm_constants.EventFieldType.UID],
                             min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
                         )
-                        return
+                        continue
                     # Get the timestamp of the latest request:
                     latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
@@ -470,9 +470,10 @@ class MonitoringApplicationController:
                         model_monitoring_access_key=model_monitoring_access_key,
                         parquet_target_path=parquet_target_path,
                     )
-        except FileNotFoundError as e:
+        except Exception as e:
             logger.error(
-                f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
+                "Encountered an exception",
+                endpoint_id=endpoint[mm_constants.EventFieldType.UID],
             )
             return endpoint_id, e

mlrun/model_monitoring/controller_handler.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import mlrun
 from mlrun.model_monitoring.controller import MonitoringApplicationController
@@ -29,4 +28,4 @@ def handler(context: mlrun.run.MLClientCtx):
     )
     monitor_app_controller.run()
     if monitor_app_controller.endpoints_exceptions:
-        print(monitor_app_controller.endpoints_exceptions)
+        context.logger.error(monitor_app_controller.endpoints_exceptions)

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -11,13 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+import datetime
 import typing
+import mlrun
 import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas
+from mlrun.common.schemas.model_monitoring import EventFieldType
+from mlrun.errors import MLRunInvalidArgumentError
+from mlrun.model_monitoring.model_endpoint import ModelEndpoint
+from mlrun.utils import logger
+if typing.TYPE_CHECKING:
+    from mlrun.db.base import RunDBInterface
 def get_stream_path(project: str = None, application_name: str = None):
@@ -89,3 +97,53 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
         )
         or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
     )
+def bump_model_endpoint_last_request(
+    project: str,
+    model_endpoint: ModelEndpoint,
+    db: "RunDBInterface",
+    minutes_delta: int = 10,  # TODO: move to config - should be the same as `batch_interval`
+    seconds_delta: int = 1,
+) -> None:
+    """
+    Update the last request field of the model endpoint to be after the current last request time.
+    :param project:         Project name.
+    :param model_endpoint:  Model endpoint object.
+    :param db:              DB interface.
+    :param minutes_delta:   Minutes delta to add to the last request time.
+    :param seconds_delta:   Seconds delta to add to the last request time. This is mainly to ensure that the last
+                            request time is strongly greater than the previous one (with respect to the window time)
+                            after adding the minutes delta.
+    """
+    if not model_endpoint.status.last_request:
+        logger.error(
+            "Model endpoint last request time is empty, cannot bump it.",
+            project=project,
+            endpoint_id=model_endpoint.metadata.uid,
+        )
+        raise MLRunInvalidArgumentError("Model endpoint last request time is empty")
+    bumped_last_request = (
+        datetime.datetime.fromisoformat(model_endpoint.status.last_request)
+        + datetime.timedelta(
+            minutes=minutes_delta,
+            seconds=seconds_delta,
+        )
+        + datetime.timedelta(
+            seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
+        )
+    ).strftime(EventFieldType.TIME_FORMAT)
+    logger.info(
+        "Bumping model endpoint last request time",
+        project=project,
+        endpoint_id=model_endpoint.metadata.uid,
+        last_request=model_endpoint.status.last_request,
+        bumped_last_request=bumped_last_request,
+    )
+    db.patch_model_endpoint(
+        project=project,
+        endpoint_id=model_endpoint.metadata.uid,
+        attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
+    )

mlrun/projects/operations.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import warnings
 from typing import Dict, List, Optional, Union
 import kfp
@@ -270,10 +271,17 @@ def build_function(
         e.g. extra_args="--skip-tls-verify --build-arg A=val"
     :param force_build: Force building the image, even when no changes were made
     """
+    if not overwrite_build_params:
+        # TODO: change overwrite_build_params default to True in 1.8.0
+        warnings.warn(
+            "The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
+            mlrun.utils.OverwriteBuildParamsWarning,
+        )
     engine, function = _get_engine_and_function(function, project_object)
     if function.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
         raise mlrun.errors.MLRunInvalidArgumentError(
-            "cannot build use deploy_function()"
+            "Cannot build use deploy_function()"
         )
     if engine == "kfp":
         if overwrite_build_params:
@@ -291,15 +299,21 @@ def build_function(
             skip_deployed=skip_deployed,
         )
     else:
-        function.build_config(
-            image=image,
-            base_image=base_image,
-            commands=commands,
-            secret=secret_name,
-            requirements=requirements,
-            overwrite=overwrite_build_params,
-            extra_args=extra_args,
-        )
+        # TODO: remove filter once overwrite_build_params default is changed to True in 1.8.0
+        with warnings.catch_warnings():
+            warnings.simplefilter(
+                "ignore", category=mlrun.utils.OverwriteBuildParamsWarning
+            )
+            function.build_config(
+                image=image,
+                base_image=base_image,
+                commands=commands,
+                secret=secret_name,
+                requirements=requirements,
+                overwrite=overwrite_build_params,
+                extra_args=extra_args,
+            )
         ready = function.deploy(
             watch=True,
             with_mlrun=with_mlrun,

mlrun 1.6.0rc11__py3-none-any.whl → 1.6.0rc13__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc11py3-none-any.whl → 1.6.0rc13py3-none-any.whl