PyPI - mlrun - Versions diffs - 1.10.0rc9__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl - Mend

mlrun 1.10.0rc9py3-none-any.whl → 1.10.0rc11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (57) hide show

mlrun/artifacts/manager.py +1 -1
mlrun/common/constants.py +12 -0
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/functions.py +2 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +19 -1
mlrun/common/schemas/serving.py +1 -0
mlrun/common/schemas/workflow.py +8 -0
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +4 -2
mlrun/datastore/datastore.py +46 -14
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/s3.py +16 -5
mlrun/datastore/sources.py +2 -2
mlrun/datastore/targets.py +2 -2
mlrun/db/__init__.py +0 -1
mlrun/db/base.py +29 -0
mlrun/db/httpdb.py +35 -0
mlrun/db/nopdb.py +19 -0
mlrun/execution.py +12 -0
mlrun/frameworks/tf_keras/mlrun_interface.py +8 -19
mlrun/frameworks/tf_keras/model_handler.py +21 -12
mlrun/launcher/base.py +1 -0
mlrun/launcher/client.py +1 -0
mlrun/launcher/local.py +4 -0
mlrun/model.py +15 -4
mlrun/model_monitoring/applications/base.py +74 -56
mlrun/model_monitoring/db/tsdb/base.py +52 -19
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +179 -11
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +26 -11
mlrun/model_monitoring/helpers.py +48 -0
mlrun/projects/__init__.py +1 -0
mlrun/projects/pipelines.py +44 -1
mlrun/projects/project.py +30 -0
mlrun/runtimes/daskjob.py +2 -0
mlrun/runtimes/kubejob.py +4 -0
mlrun/runtimes/mpijob/abstract.py +2 -0
mlrun/runtimes/mpijob/v1.py +2 -0
mlrun/runtimes/nuclio/function.py +2 -0
mlrun/runtimes/nuclio/serving.py +59 -0
mlrun/runtimes/pod.py +3 -0
mlrun/runtimes/remotesparkjob.py +2 -0
mlrun/runtimes/sparkjob/spark3job.py +2 -0
mlrun/serving/routers.py +17 -13
mlrun/serving/server.py +97 -3
mlrun/serving/states.py +146 -38
mlrun/serving/system_steps.py +2 -1
mlrun/serving/v2_serving.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/METADATA +13 -7
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/RECORD +55 -57
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/licenses/LICENSE +1 -1
mlrun/db/sql_types.py +0 -160
mlrun/utils/db.py +0 -71
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc9.dist-info → mlrun-1.10.0rc11.dist-info}/top_level.txt +0 -0

mlrun/artifacts/manager.py CHANGED Viewed

@@ -413,8 +413,8 @@ class ArtifactManager:
         self.artifact_db.del_artifact(
             key=item.db_key,
             project=item.project,
-            tag=item.tag,
             tree=item.tree,
+            uid=item.uid,
             iter=item.iter,
             deletion_strategy=deletion_strategy,
             secrets=secrets,

mlrun/common/constants.py CHANGED Viewed

@@ -27,6 +27,10 @@ DASK_LABEL_PREFIX = "dask.org/"
 NUCLIO_LABEL_PREFIX = "nuclio.io/"
 RESERVED_TAG_NAME_LATEST = "latest"
+JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
+JOB_TYPE_PROJECT_LOADER = "project-loader"
+JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
 class MLRunInternalLabels:
     ### dask
@@ -76,6 +80,9 @@ class MLRunInternalLabels:
     kind = "kind"
     component = "component"
     mlrun_type = "mlrun__type"
+    rerun_of = "rerun-of"
+    original_workflow_id = "original-workflow-id"
+    workflow_id = "workflow-id"
     owner = "owner"
     v3io_user = "v3io_user"
@@ -101,3 +108,8 @@ class MLRunInternalLabels:
 class DeployStatusTextKind(mlrun.common.types.StrEnum):
     logs = "logs"
     events = "events"
+class WorkflowSubmitMode(mlrun.common.types.StrEnum):
+    direct = "direct"  # call KFP retry API directly
+    rerun = "rerun"  # launch a RerunRunner function

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -218,6 +218,7 @@ from .serving import ModelRunnerStepData, MonitoringData
 from .tag import Tag, TagObjects
 from .workflow import (
     GetWorkflowResponse,
+    RerunWorkflowRequest,
     WorkflowRequest,
     WorkflowResponse,
     WorkflowSpec,

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -54,6 +54,8 @@ from .grafana import (
     GrafanaTable,
 )
 from .model_endpoints import (
+    ApplicationMetricRecord,
+    ApplicationResultRecord,
     Features,
     FeatureValues,
     ModelEndpoint,

mlrun/common/schemas/model_monitoring/functions.py CHANGED Viewed

@@ -34,6 +34,7 @@ class FunctionSummary(BaseModel):
     type: FunctionsType
     name: str
     application_class: str
+    project_name: str
     updated_time: datetime
     status: Optional[str] = None
     base_period: Optional[int] = None
@@ -59,6 +60,7 @@ class FunctionSummary(BaseModel):
             else func_dict["spec"]["graph"]["steps"]["PushToMonitoringWriter"]["after"][
                 0
             ],
+            project_name=func_dict["metadata"]["project"],
             updated_time=func_dict["metadata"].get("updated"),
             status=func_dict["status"].get("state"),
             base_period=base_period,

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import abc
 import json
 from datetime import datetime
-from typing import Any, NamedTuple, Optional, TypeVar
+from typing import Any, Literal, NamedTuple, Optional, TypeVar
 from uuid import UUID
 from pydantic import validator  # use `validator` if you’re still on Pydantic v1
@@ -334,6 +334,24 @@ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBa
     data: bool = False
+class ApplicationBaseRecord(BaseModel):
+    type: Literal["metric", "result"]
+    time: datetime
+    value: float
+class ApplicationResultRecord(ApplicationBaseRecord):
+    kind: ResultKindApp
+    status: ResultStatusApp
+    result_name: str
+    type: Literal["result"] = "result"
+class ApplicationMetricRecord(ApplicationBaseRecord):
+    metric_name: str
+    type: Literal["metric"] = "metric"
 def _mapping_attributes(
     model_class: type[Model],
     flattened_dictionary: dict,

mlrun/common/schemas/serving.py CHANGED Viewed

@@ -26,6 +26,7 @@ class DeployResponse(BaseModel):
 class ModelRunnerStepData(StrEnum):
     MODELS = "models"
+    MODEL_TO_EXECUTION_MECHANISM = "execution_mechanism_by_model_name"
     MONITORING_DATA = "monitoring_data"

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -46,6 +46,14 @@ class WorkflowRequest(pydantic.v1.BaseModel):
     notifications: typing.Optional[list[Notification]] = None
+class RerunWorkflowRequest(pydantic.v1.BaseModel):
+    run_name: typing.Optional[str] = None
+    run_id: typing.Optional[str] = None
+    original_workflow_id: typing.Optional[str] = None
+    notifications: typing.Optional[list[Notification]] = None
+    workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
 class WorkflowResponse(pydantic.v1.BaseModel):
     project: str = None
     name: str = None

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -224,7 +224,7 @@ class AzureBlobStore(DataStore):
         path = self._convert_key_to_remote_path(key=path)
         super().rm(path=path, recursive=recursive, maxdepth=maxdepth)
-    def get_spark_options(self):
+    def get_spark_options(self, path=None):
         res = {}
         st = self.storage_options
         service = "blob"

mlrun/datastore/base.py CHANGED Viewed

@@ -48,7 +48,9 @@ class FileStats:
 class DataStore:
     using_bucket = False
-    def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
+    def __init__(
+        self, parent, name, kind, endpoint="", secrets: Optional[dict] = None, **kwargs
+    ):
         self._parent = parent
         self.kind = kind
         self.name = name
@@ -176,7 +178,7 @@ class DataStore:
     def upload(self, key, src_path):
         pass
-    def get_spark_options(self):
+    def get_spark_options(self, path=None):
         return {}
     @staticmethod

mlrun/datastore/datastore.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from typing import Optional
 from urllib.parse import urlparse
@@ -105,8 +106,7 @@ def schema_to_store(schema) -> DataStore.__subclasses__():
         from .alibaba_oss import OSSStore
         return OSSStore
-    else:
-        raise ValueError(f"unsupported store scheme ({schema})")
+    raise ValueError(f"unsupported store scheme ({schema})")
 def uri_to_ipython(link):
@@ -210,12 +210,20 @@ class StoreManager:
             artifact_url=artifact_url,
         )
-    def get_or_create_store(
-        self, url, secrets: Optional[dict] = None, project_name=""
+    def _get_or_create_remote_client(
+        self,
+        url,
+        secrets: Optional[dict] = None,
+        project_name="",
+        cache: Optional[dict] = None,
+        schema_to_class: callable = schema_to_store,
+        **kwargs,
     ) -> (DataStore, str, str):
+        # The cache can be an empty dictionary ({}), even if it is a _stores object
+        cache = cache if cache is not None else {}
         schema, endpoint, parsed_url = parse_url(url)
         subpath = parsed_url.path
-        store_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
+        cache_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
         if schema == "ds":
             datastore_profile = datastore_profile_read(url, project_name, secrets)
@@ -237,24 +245,48 @@ class StoreManager:
             subpath = url.replace("file://", "", 1)
         if not schema and endpoint:
-            if endpoint in self._stores.keys():
-                return self._stores[endpoint], subpath, url
+            if endpoint in cache.keys():
+                return cache[endpoint], subpath, url
             else:
                 raise ValueError(f"no such store ({endpoint})")
         if not secrets and not mlrun.config.is_running_as_api():
-            if store_key in self._stores.keys():
-                return self._stores[store_key], subpath, url
+            if cache_key in cache.keys():
+                return cache[cache_key], subpath, url
         # support u/p embedding in url (as done in redis) by setting netloc as the "endpoint" parameter
         # when running on server we don't cache the datastore, because there are multiple users and we don't want to
         # cache the credentials, so for each new request we create a new store
-        store = schema_to_store(schema)(
-            self, schema, store_key, parsed_url.netloc, secrets=secrets
+        remote_client_class = schema_to_class(schema)
+        remote_client = None
+        if remote_client_class:
+            remote_client = remote_client_class(
+                self, schema, cache_key, parsed_url.netloc, secrets=secrets, **kwargs
+            )
+            if not secrets and not mlrun.config.is_running_as_api():
+                cache[cache_key] = remote_client
+        else:
+            warnings.warn("scheme not found. Returning None")
+        return remote_client, subpath, url
+    def get_or_create_store(
+        self,
+        url,
+        secrets: Optional[dict] = None,
+        project_name="",
+    ) -> (DataStore, str, str):
+        datastore, sub_path, url = self._get_or_create_remote_client(
+            url=url,
+            secrets=secrets,
+            project_name=project_name,
+            cache=self._stores,
+            schema_to_class=schema_to_store,
         )
-        if not secrets and not mlrun.config.is_running_as_api():
-            self._stores[store_key] = store
-        return store, subpath, url
+        if not isinstance(datastore, DataStore):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "remote client by url is not datastore"
+            )
+        return datastore, sub_path, url
     def reset_secrets(self):
         self._secrets = {}

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -194,7 +194,7 @@ class GoogleCloudStorageStore(DataStore):
         self.filesystem.exists(path)
         super().rm(path, recursive=recursive, maxdepth=maxdepth)
-    def get_spark_options(self):
+    def get_spark_options(self, path=None):
         res = {}
         st = self._get_credentials()
         if "token" in st:

mlrun/datastore/s3.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import time
 from typing import Optional
+from urllib.parse import urlparse
 import boto3
 from boto3.s3.transfer import TransferConfig
@@ -115,17 +116,27 @@ class S3Store(DataStore):
             byterange += str(offset + size - 1)
         return byterange
-    def get_spark_options(self):
+    def get_spark_options(self, path=None):
         res = {}
+        bucket_str = ""
+        if path:
+            parsed = urlparse(path)
+            if parsed.scheme:  # s3:// or s3a://
+                bucket = parsed.hostname
+            else:
+                # drop a leading slash, if any and take 1st segment
+                bucket = path.lstrip("/").split("/", 1)[0]
+            bucket_str = f".bucket.{bucket}"
         st = self.get_storage_options()
         if st.get("key"):
-            res["spark.hadoop.fs.s3a.access.key"] = st.get("key")
+            res[f"spark.hadoop.fs.s3a{bucket_str}.access.key"] = st.get("key")
         if st.get("secret"):
-            res["spark.hadoop.fs.s3a.secret.key"] = st.get("secret")
+            res[f"spark.hadoop.fs.s3a{bucket_str}.secret.key"] = st.get("secret")
         if st.get("endpoint_url"):
-            res["spark.hadoop.fs.s3a.endpoint"] = st.get("endpoint_url")
+            res[f"spark.hadoop.fs.s3a{bucket_str}.endpoint"] = st.get("endpoint_url")
         if st.get("profile"):
-            res["spark.hadoop.fs.s3a.aws.profile"] = st.get("profile")
+            res[f"spark.hadoop.fs.s3a{bucket_str}.aws.profile"] = st.get("profile")
         return res
     @property

mlrun/datastore/sources.py CHANGED Viewed

@@ -220,7 +220,7 @@ class CSVSource(BaseSourceDriver):
     def get_spark_options(self):
         store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
-        spark_options = store.get_spark_options()
+        spark_options = store.get_spark_options(store.spark_url + path)
         spark_options.update(
             {
                 "path": store.spark_url + path,
@@ -407,7 +407,7 @@ class ParquetSource(BaseSourceDriver):
     def get_spark_options(self):
         store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
-        spark_options = store.get_spark_options()
+        spark_options = store.get_spark_options(store.spark_url + path)
         spark_options.update(
             {
                 "path": store.spark_url + path,

mlrun/datastore/targets.py CHANGED Viewed

@@ -970,7 +970,7 @@ class ParquetTarget(BaseStoreTarget):
                         break
         store, path, url = self._get_store_and_path()
-        spark_options = store.get_spark_options()
+        spark_options = store.get_spark_options(store.spark_url + path)
         spark_options.update(
             {
                 "path": store.spark_url + path,
@@ -1104,7 +1104,7 @@ class CSVTarget(BaseStoreTarget):
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
         store, path, url = self._get_store_and_path()
-        spark_options = store.get_spark_options()
+        spark_options = store.get_spark_options(store.spark_url + path)
         spark_options.update(
             {
                 "path": store.spark_url + path,

mlrun/db/__init__.py CHANGED Viewed

@@ -14,7 +14,6 @@
 from os import environ
 from ..config import config
-from . import sql_types
 from .base import RunDBError, RunDBInterface  # noqa

mlrun/db/base.py CHANGED Viewed

@@ -638,6 +638,17 @@ class RunDBInterface(ABC):
     ):
         pass
+    @abstractmethod
+    def retry_pipeline(
+        self,
+        run_id: str,
+        project: str,
+        namespace: Optional[str] = None,
+        timeout: int = 30,
+        submit_mode: str = "",
+    ):
+        pass
     @abstractmethod
     def list_project_secrets(
         self,
@@ -1034,6 +1045,13 @@ class RunDBInterface(ABC):
     ):
         pass
+    def get_project_background_task(
+        self,
+        project: str,
+        name: str,
+    ) -> mlrun.common.schemas.BackgroundTask:
+        pass
     @abstractmethod
     def submit_workflow(
         self,
@@ -1113,6 +1131,17 @@ class RunDBInterface(ABC):
     ) -> list[mlrun.common.schemas.model_monitoring.FunctionSummary]:
         pass
+    @abstractmethod
+    def get_monitoring_function_summary(
+        self,
+        project: str,
+        function_name: str,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+        include_latest_metrics: bool = False,
+    ) -> mlrun.common.schemas.model_monitoring.FunctionSummary:
+        pass
     @abstractmethod
     def get_project_summary(self, project: str) -> mlrun.common.schemas.ProjectSummary:
         pass

mlrun/db/httpdb.py CHANGED Viewed

@@ -2350,6 +2350,7 @@ class HTTPRunDB(RunDBInterface):
         project: str,
         namespace: Optional[str] = None,
         timeout: int = 30,
+        submit_mode: str = "",
     ):
         """
         Retry a specific pipeline run using its run ID. This function sends an API request
@@ -2359,6 +2360,7 @@ class HTTPRunDB(RunDBInterface):
         :param namespace: Kubernetes namespace where the pipeline is running. Optional.
         :param timeout: Timeout (in seconds) for the API call. Defaults to 30 seconds.
         :param project: Name of the MLRun project associated with the pipeline.
+        :param submit_mode: Whether to submit the pipeline directly to the API.
         :raises ValueError: Raised if the API response is not successful or contains an
             error.
@@ -2370,6 +2372,9 @@ class HTTPRunDB(RunDBInterface):
         if namespace:
             params["namespace"] = namespace
+        if submit_mode:
+            params["submit-mode"] = submit_mode
         resp_text = ""
         resp_code = None
         try:
@@ -4188,6 +4193,36 @@ class HTTPRunDB(RunDBInterface):
             results.append(FunctionSummary(**item))
         return results
+    def get_monitoring_function_summary(
+        self,
+        project: str,
+        function_name: str,
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
+        include_latest_metrics: bool = False,
+    ) -> FunctionSummary:
+        """
+        Get a monitoring function summary for the specified project and function.
+        :param project:                The name of the project.
+        :param function_name:          The name of the function.
+        :param start:                  Start time for filtering the results (optional).
+        :param end:                    End time for filtering the results (optional).
+        :param include_latest_metrics: Whether to include the latest metrics in the response (default is False).
+        :return: A FunctionSummary object containing information about the monitoring function.
+        """
+        response = self.api_call(
+            method=mlrun.common.types.HTTPMethod.GET,
+            path=f"projects/{project}/model-monitoring/function-summaries/{function_name}",
+            params={
+                "start": datetime_to_iso(start),
+                "end": datetime_to_iso(end),
+                "include-latest-metrics": include_latest_metrics,
+            },
+        )
+        return FunctionSummary(**response.json())
     def create_hub_source(
         self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
     ):

mlrun/db/nopdb.py CHANGED Viewed

@@ -524,6 +524,15 @@ class NopDB(RunDBInterface):
     ):
         pass
+    def retry_pipeline(
+        self,
+        run_id: str,
+        project: str,
+        namespace: Optional[str] = None,
+        timeout: int = 30,
+    ):
+        pass
     def list_pipelines(
         self,
         project: str,
@@ -893,6 +902,16 @@ class NopDB(RunDBInterface):
     ) -> [mlrun.common.schemas.model_monitoring.FunctionSummary]:
         pass
+    def get_monitoring_function_summary(
+        self,
+        project: str,
+        function_name: str,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+        include_latest_metrics: bool = False,
+    ) -> mlrun.common.schemas.model_monitoring.FunctionSummary:
+        pass
     def generate_event(
         self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
     ):

mlrun/execution.py CHANGED Viewed

@@ -1286,6 +1286,18 @@ class MLClientCtx:
                 self.to_dict(), self._uid, self.project, iter=self._iteration
             )
+    def update_run(self):
+        """
+        Store the run object in the DB - removes missing fields.
+        Use _update_run for coherent updates.
+        Should be called by the logging worker only (see is_logging_worker()).
+        """
+        self._write_tmpfile()
+        if self._rundb:
+            self._rundb.update_run(
+                self.to_dict(), self._uid, self.project, iter=self._iteration
+            )
     def is_logging_worker(self):
         """
         Check if the current worker is the logging worker.

mlrun/frameworks/tf_keras/mlrun_interface.py CHANGED Viewed

@@ -107,14 +107,10 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
             )
         # Call the pre compile method:
-        (optimizer, experimental_run_tf_function) = self._pre_compile(
-            optimizer=kwargs["optimizer"]
-        )
+        optimizer = self._pre_compile(optimizer=kwargs["optimizer"])
         # Assign parameters:
         kwargs["optimizer"] = optimizer
-        if experimental_run_tf_function is not None:
-            kwargs["experimental_run_tf_function"] = experimental_run_tf_function
         # Call the original compile method:
         return self.original_compile(*args, **kwargs)
@@ -235,23 +231,20 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
         """
         self._RANK_0_ONLY_CALLBACKS.add(callback_name)
-    def _pre_compile(self, optimizer: Optimizer) -> tuple[Optimizer, Union[bool, None]]:
+    def _pre_compile(self, optimizer: Optimizer) -> Optimizer:
         """
         Method to call before calling 'compile' to setup the run and inputs for using horovod.
         :param optimizer: The optimzier to compile. It will be wrapped in horovod's distributed optimizer:
                           'hvd.DistributedOptimizer'.
-        :return: The updated parameters:
-                 [0] = Wrapped optimizer.
-                 [1] = The 'experimental_run_tf_function' parameter for 'compile' kwargs or 'None' if horovod should not
-                       be used.
+        :return: The updated Wrapped optimizer.
         :raise MLRunInvalidArgumentError: In case the optimizer was passed as a string.
         """
         # Check if needed to run with horovod:
         if self._hvd is None:
-            return optimizer, None
+            return optimizer
         # Validate the optimizer input:
         if isinstance(optimizer, str):
@@ -280,19 +273,15 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
             print(f"Horovod worker #{self._hvd.rank()} is using CPU")
         # Adjust learning rate based on the number of GPUs:
-        if hasattr(self.optimizer, "lr"):
-            optimizer.lr *= self._hvd.size()
+        if hasattr(optimizer, "lr"):
+            optimizer.lr = optimizer.lr * self._hvd.size()
         else:
-            optimizer.learning_rate *= self._hvd.size()
+            optimizer.learning_rate = optimizer.learning_rate * self._hvd.size()
         # Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'.
         optimizer = self._hvd.DistributedOptimizer(optimizer)
-        # Compile the model with `experimental_run_tf_function=False` to ensure Tensorflow uses the distributed
-        # optimizer to compute the gradients:
-        experimental_run_tf_function = False
-        return optimizer, experimental_run_tf_function
+        return optimizer
     def _pre_fit(
         self,

mlrun/frameworks/tf_keras/model_handler.py CHANGED Viewed

@@ -518,7 +518,6 @@ class TFKerasModelHandler(DLModelHandler):
         )
         # Read additional files according to the model format used:
-        # # ModelFormats.SAVED_MODEL - Unzip the SavedModel archive:
         if self._model_format == TFKerasModelHandler.ModelFormats.SAVED_MODEL:
             # Unzip the SavedModel directory:
             with zipfile.ZipFile(self._model_file, "r") as zip_file:
@@ -528,21 +527,17 @@ class TFKerasModelHandler(DLModelHandler):
                 os.path.dirname(self._model_file), self._model_name
             )
         elif self._model_format == TFKerasModelHandler.ModelFormats.KERAS:
-            # When keras tried to load it, it validates the suffix. The `artifacts.model.get_model` function is
-            # downloading the keras file to a temp file with a `pkl` suffix, so it needs to be replaced:
-            self._model_file = self._model_file.rsplit(".pkl", 1)[0] + ".keras"
+            # Rename the model file suffix:
+            self._rename_model_file_suffix(suffix="keras")
         elif self._model_format == TFKerasModelHandler.ModelFormats.H5:
-            # When keras tried to load it, it validates the suffix. The `artifacts.model.get_model` function is
-            # downloading the keras file to a temp file with a `pkl` suffix, so it needs to be replaced:
-            self._model_file = self._model_file.rsplit(".pkl", 1)[0] + ".h5"
-        # # ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS - Get the weights file:
-        elif (
+            # Rename the model file suffix:
+            self._rename_model_file_suffix(suffix="h5")
+        elif (  # ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
             self._model_format
             == TFKerasModelHandler.ModelFormats.JSON_ARCHITECTURE_H5_WEIGHTS
         ):
-            # When keras tried to load it, it validates the suffix. The `artifacts.model.get_model` function is
-            # downloading the keras file to a temp file with a `pkl` suffix, so it needs to be replaced:
-            self._model_file = self._model_file.rsplit(".pkl", 1)[0] + ".json"
+            # Rename the model file suffix:
+            self._rename_model_file_suffix(suffix="json")
             # Get the weights file:
             self._weights_file = self._extra_data[
                 self._get_weights_file_artifact_name()
@@ -551,6 +546,20 @@ class TFKerasModelHandler(DLModelHandler):
         # Continue collecting from abstract class:
         super()._collect_files_from_store_object()
+    def _rename_model_file_suffix(self, suffix: str):
+        """
+        Rename the model file suffix to the given one.
+        This is used for the case of loading a model from a store object that was saved with a different suffix as when
+        keras tries to load it, it validates the suffix. The `artifacts.model.get_model` function is downloading the
+        file to a temp file with a `pkl` suffix, so it needs to be replaced:than the one keras expects.
+        :param suffix: The suffix to rename the model file to (without the trailing dot).
+        """
+        new_name = self._model_file.rsplit(".", 1)[0] + f".{suffix}"
+        os.rename(self._model_file, new_name)
+        self._model_file = new_name
     def _collect_files_from_local_path(self):
         """
         If the model path given is of a local path, search for the needed model files and collect them into this handler

mlrun/launcher/base.py CHANGED Viewed

@@ -82,6 +82,7 @@ class BaseLauncher(abc.ABC):
         runtime: "mlrun.runtimes.base.BaseRuntime",
         project_name: Optional[str] = "",
         full: bool = True,
+        client_version: str = "",
     ):
         pass

mlrun 1.10.0rc9__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc9py3-none-any.whl → 1.10.0rc11py3-none-any.whl