PyPI - mlrun - Versions diffs - 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show

mlrun/__main__.py +4 -2
mlrun/alerts/alert.py +75 -8
mlrun/artifacts/base.py +1 -0
mlrun/artifacts/manager.py +9 -2
mlrun/common/constants.py +4 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/__init__.py +3 -1
mlrun/common/schemas/alert.py +15 -12
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/frontend_spec.py +7 -0
mlrun/common/schemas/function.py +7 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +41 -26
mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
mlrun/common/schemas/notification.py +69 -12
mlrun/common/schemas/project.py +45 -12
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +1 -0
mlrun/config.py +91 -35
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +57 -25
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +125 -37
mlrun/datastore/base.py +42 -21
mlrun/datastore/datastore.py +4 -2
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +85 -29
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +25 -12
mlrun/datastore/sources.py +76 -4
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +102 -131
mlrun/datastore/v3io.py +1 -0
mlrun/db/base.py +15 -6
mlrun/db/httpdb.py +57 -28
mlrun/db/nopdb.py +29 -5
mlrun/errors.py +20 -3
mlrun/execution.py +46 -5
mlrun/feature_store/api.py +25 -1
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/spark_merger.py +10 -39
mlrun/feature_store/steps.py +8 -0
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/k8s_utils.py +48 -2
mlrun/launcher/client.py +6 -6
mlrun/launcher/local.py +2 -2
mlrun/model.py +215 -34
mlrun/model_monitoring/api.py +38 -24
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +60 -29
mlrun/model_monitoring/applications/base.py +2 -174
mlrun/model_monitoring/applications/context.py +197 -70
mlrun/model_monitoring/applications/evidently_base.py +11 -85
mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +110 -282
mlrun/model_monitoring/db/stores/__init__.py +8 -3
mlrun/model_monitoring/db/stores/base/store.py +3 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
mlrun/model_monitoring/db/tsdb/base.py +147 -15
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
mlrun/model_monitoring/helpers.py +70 -50
mlrun/model_monitoring/stream_processing.py +96 -195
mlrun/model_monitoring/writer.py +13 -5
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/operations.py +16 -8
mlrun/projects/pipelines.py +126 -115
mlrun/projects/project.py +286 -129
mlrun/render.py +3 -3
mlrun/run.py +38 -19
mlrun/runtimes/__init__.py +19 -8
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/local.py +12 -5
mlrun/runtimes/nuclio/api_gateway.py +68 -8
mlrun/runtimes/nuclio/application/application.py +307 -70
mlrun/runtimes/nuclio/function.py +63 -14
mlrun/runtimes/nuclio/serving.py +10 -10
mlrun/runtimes/pod.py +25 -19
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +16 -17
mlrun/runtimes/utils.py +34 -0
mlrun/serving/routers.py +2 -5
mlrun/serving/server.py +37 -19
mlrun/serving/states.py +30 -3
mlrun/serving/v2_serving.py +44 -35
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +1 -1
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +150 -36
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/__init__.py +0 -1
mlrun/utils/notifications/notification/webhook.py +8 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/evidently_application.py +0 -20
mlrun/model_monitoring/prometheus.py +0 -216
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/db/httpdb.py CHANGED Viewed

@@ -525,10 +525,6 @@ class HTTPRunDB(RunDBInterface):
                 server_cfg.get("external_platform_tracking")
                 or config.external_platform_tracking
             )
-            config.model_endpoint_monitoring.store_type = (
-                server_cfg.get("model_endpoint_monitoring_store_type")
-                or config.model_endpoint_monitoring.store_type
-            )
             config.model_endpoint_monitoring.endpoint_store_connection = (
                 server_cfg.get("model_endpoint_monitoring_endpoint_store_connection")
                 or config.model_endpoint_monitoring.endpoint_store_connection
@@ -1015,7 +1011,7 @@ class HTTPRunDB(RunDBInterface):
             "format": format_,
             "tag": tag,
             "tree": tree,
-            "uid": uid,
+            "object-uid": uid,
         }
         if iter is not None:
             params["iter"] = str(iter)
@@ -1033,6 +1029,7 @@ class HTTPRunDB(RunDBInterface):
             mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
         ),
         secrets: dict = None,
+        iter=None,
     ):
         """Delete an artifact.
@@ -1050,7 +1047,8 @@ class HTTPRunDB(RunDBInterface):
             "key": key,
             "tag": tag,
             "tree": tree,
-            "uid": uid,
+            "object-uid": uid,
+            "iter": iter,
             "deletion_strategy": deletion_strategy,
         }
         error = f"del artifact {project}/{key}"
@@ -1069,8 +1067,8 @@ class HTTPRunDB(RunDBInterface):
         project=None,
         tag=None,
         labels: Optional[Union[dict[str, str], list[str]]] = None,
-        since=None,
-        until=None,
+        since: Optional[datetime] = None,
+        until: Optional[datetime] = None,
         iter: int = None,
         best_iteration: bool = False,
         kind: str = None,
@@ -1100,8 +1098,8 @@ class HTTPRunDB(RunDBInterface):
         :param tag: Return artifacts assigned this tag.
         :param labels: Return artifacts that have these labels. Labels can either be a dictionary {"label": "value"} or
             a list of "label=value" (match label key and value) or "label" (match just label key) strings.
-        :param since: Not in use in :py:class:`HTTPRunDB`.
-        :param until: Not in use in :py:class:`HTTPRunDB`.
+        :param since: Return artifacts updated after this date (as datetime object).
+        :param until: Return artifacts updated before this date (as datetime object).
         :param iter: Return artifacts from a specific iteration (where ``iter=0`` means the root iteration). If
             ``None`` (default) return artifacts from all iterations.
         :param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
@@ -1135,6 +1133,8 @@ class HTTPRunDB(RunDBInterface):
             "format": format_,
             "producer_uri": producer_uri,
             "limit": limit,
+            "since": datetime_to_iso(since),
+            "until": datetime_to_iso(until),
         }
         error = "list artifacts"
         endpoint_path = f"projects/{project}/artifacts"
@@ -1251,13 +1251,17 @@ class HTTPRunDB(RunDBInterface):
                     function_name=name,
                 )
-    def list_functions(self, name=None, project=None, tag=None, labels=None):
+    def list_functions(
+        self, name=None, project=None, tag=None, labels=None, since=None, until=None
+    ):
         """Retrieve a list of functions, filtered by specific criteria.
         :param name: Return only functions with a specific name.
         :param project: Return functions belonging to this project. If not specified, the default project is used.
-        :param tag: Return function versions with specific tags.
+        :param tag: Return function versions with specific tags. To return only tagged functions, set tag to ``"*"``.
         :param labels: Return functions that have specific labels assigned to them.
+        :param since: Return functions updated after this date (as datetime object).
+        :param until: Return functions updated before this date (as datetime object).
         :returns: List of function objects (as dictionary).
         """
         project = project or config.default_project
@@ -1265,6 +1269,8 @@ class HTTPRunDB(RunDBInterface):
             "name": name,
             "tag": tag,
             "label": labels or [],
+            "since": datetime_to_iso(since),
+            "until": datetime_to_iso(until),
         }
         error = "list functions"
         path = f"projects/{project}/functions"
@@ -1364,20 +1370,14 @@ class HTTPRunDB(RunDBInterface):
         :returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
             that were removed.
         """
-        if grace_period is None:
-            grace_period = config.runtime_resources_deletion_grace_period
-            logger.info(
-                "Using default grace period for runtime resources deletion",
-                grace_period=grace_period,
-            )
         params = {
             "label-selector": label_selector,
             "kind": kind,
             "object-id": object_id,
             "force": force,
-            "grace-period": grace_period,
         }
+        if grace_period is not None:
+            params["grace-period"] = grace_period
         error = "Failed deleting runtime resources"
         project_path = project if project else "*"
         response = self.api_call(
@@ -1676,7 +1676,7 @@ class HTTPRunDB(RunDBInterface):
             last_log_timestamp = float(
                 resp.headers.get("x-mlrun-last-timestamp", "0.0")
             )
-            if func.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
+            if func.kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes():
                 mlrun.runtimes.nuclio.function.enrich_nuclio_function_from_headers(
                     func, resp.headers
                 )
@@ -2235,6 +2235,9 @@ class HTTPRunDB(RunDBInterface):
         partition_order: Union[
             mlrun.common.schemas.OrderType, str
         ] = mlrun.common.schemas.OrderType.desc,
+        format_: Union[
+            str, mlrun.common.formatters.FeatureSetFormat
+        ] = mlrun.common.formatters.FeatureSetFormat.full,
     ) -> list[FeatureSet]:
         """Retrieve a list of feature-sets matching the criteria provided.
@@ -2252,6 +2255,9 @@ class HTTPRunDB(RunDBInterface):
         :param partition_sort_by: What field to sort the results by, within each partition defined by `partition_by`.
             Currently the only allowed value are `created` and `updated`.
         :param partition_order: Order of sorting within partitions - `asc` or `desc`. Default is `desc`.
+        :param format_: Format of the results. Possible values are:
+            - ``minimal`` - Return minimal feature set objects, not including stats and preview for each feature set.
+            - ``full`` - Return full feature set objects.
         :returns: List of matching :py:class:`~mlrun.feature_store.FeatureSet` objects.
         """
@@ -2264,6 +2270,7 @@ class HTTPRunDB(RunDBInterface):
             "entity": entities or [],
             "feature": features or [],
             "label": labels or [],
+            "format": format_,
         }
         if partition_by:
             params.update(
@@ -2747,7 +2754,7 @@ class HTTPRunDB(RunDBInterface):
         deletion_strategy: Union[
             str, mlrun.common.schemas.DeletionStrategy
         ] = mlrun.common.schemas.DeletionStrategy.default(),
-    ):
+    ) -> None:
         """Delete a project.
         :param name: Name of the project to delete.
@@ -2766,7 +2773,7 @@ class HTTPRunDB(RunDBInterface):
             "DELETE", f"projects/{name}", error_message, headers=headers, version="v2"
         )
         if response.status_code == http.HTTPStatus.ACCEPTED:
-            logger.info("Project is being deleted", project_name=name)
+            logger.info("Waiting for project to be deleted", project_name=name)
             background_task = mlrun.common.schemas.BackgroundTask(**response.json())
             background_task = self._wait_for_background_task_to_reach_terminal_state(
                 background_task.metadata.name
@@ -2776,10 +2783,17 @@ class HTTPRunDB(RunDBInterface):
                 == mlrun.common.schemas.BackgroundTaskState.succeeded
             ):
                 logger.info("Project deleted", project_name=name)
-                return
+            elif (
+                background_task.status.state
+                == mlrun.common.schemas.BackgroundTaskState.failed
+            ):
+                logger.error(
+                    "Project deletion failed",
+                    project_name=name,
+                    error=background_task.status.error,
+                )
         elif response.status_code == http.HTTPStatus.NO_CONTENT:
             logger.info("Project deleted", project_name=name)
-            return
     def store_project(
         self,
@@ -3370,7 +3384,7 @@ class HTTPRunDB(RunDBInterface):
                                          By default, the image is mlrun/mlrun.
         """
         self.api_call(
-            method=mlrun.common.types.HTTPMethod.POST,
+            method=mlrun.common.types.HTTPMethod.PATCH,
             path=f"projects/{project}/model-monitoring/model-monitoring-controller",
             params={
                 "base_period": base_period,
@@ -3465,7 +3479,7 @@ class HTTPRunDB(RunDBInterface):
         if response.status_code == http.HTTPStatus.ACCEPTED:
             if delete_resources:
                 logger.info(
-                    "Model Monitoring is being disable",
+                    "Model Monitoring is being disabled",
                     project_name=project,
                 )
             if delete_user_applications:
@@ -3544,17 +3558,19 @@ class HTTPRunDB(RunDBInterface):
         self,
         project: str,
         credentials: dict[str, str],
+        replace_creds: bool,
     ) -> None:
         """
         Set the credentials for the model monitoring application.
         :param project:     Project name.
         :param credentials: Credentials to set.
+        :param replace_creds:       If True, will override the existing credentials.
         """
         self.api_call(
             method=mlrun.common.types.HTTPMethod.POST,
             path=f"projects/{project}/model-monitoring/set-model-monitoring-credentials",
-            params={**credentials},
+            params={**credentials, "replace_creds": replace_creds},
         )
     def create_hub_source(
@@ -4181,6 +4197,9 @@ class HTTPRunDB(RunDBInterface):
         :param event_data: The data of the event.
         :param project:    The project that the event belongs to.
         """
+        if mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.disabled:
+            logger.warning("Alerts are disabled, event will not be generated")
         project = project or config.default_project
         endpoint_path = f"projects/{project}/events/{name}"
         error_message = f"post event {project}/events/{name}"
@@ -4204,6 +4223,14 @@ class HTTPRunDB(RunDBInterface):
         :param project:    The project that the alert belongs to.
         :returns:          The created/modified alert.
         """
+        if not alert_data:
+            raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
+        if mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.disabled:
+            logger.warning(
+                "Alerts are disabled, alert will still be stored but will not be triggered"
+            )
         project = project or config.default_project
         endpoint_path = f"projects/{project}/alerts/{alert_name}"
         error_message = f"put alert {project}/alerts/{alert_name}"
@@ -4212,6 +4239,8 @@ class HTTPRunDB(RunDBInterface):
             if isinstance(alert_data, AlertConfig)
             else AlertConfig.from_dict(alert_data)
         )
+        # Validation is necessary here because users can directly invoke this function
+        # through `mlrun.get_run_db().store_alert_config()`.
         alert_instance.validate_required_fields()
         alert_data = alert_instance.to_dict()

mlrun/db/nopdb.py CHANGED Viewed

@@ -21,6 +21,7 @@ import mlrun.common.formatters
 import mlrun.common.runtimes.constants
 import mlrun.common.schemas
 import mlrun.errors
+import mlrun.lists
 from ..config import config
 from ..utils import logger
@@ -73,6 +74,22 @@ class NopDB(RunDBInterface):
     def abort_run(self, uid, project="", iter=0, timeout=45, status_text=""):
         pass
+    def list_runtime_resources(
+        self,
+        project: Optional[str] = None,
+        label_selector: Optional[str] = None,
+        kind: Optional[str] = None,
+        object_id: Optional[str] = None,
+        group_by: Optional[
+            mlrun.common.schemas.ListRuntimeResourcesGroupByField
+        ] = None,
+    ) -> Union[
+        mlrun.common.schemas.RuntimeResourcesOutput,
+        mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
+        mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
+    ]:
+        return []
     def read_run(
         self,
         uid,
@@ -108,7 +125,7 @@ class NopDB(RunDBInterface):
         max_partitions: int = 0,
         with_notifications: bool = False,
     ):
-        pass
+        return mlrun.lists.RunList()
     def del_run(self, uid, project="", iter=0):
         pass
@@ -149,7 +166,7 @@ class NopDB(RunDBInterface):
         format_: mlrun.common.formatters.ArtifactFormat = mlrun.common.formatters.ArtifactFormat.full,
         limit: int = None,
     ):
-        pass
+        return mlrun.lists.ArtifactList()
     def del_artifact(
         self,
@@ -162,6 +179,7 @@ class NopDB(RunDBInterface):
             mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
         ),
         secrets: dict = None,
+        iter=None,
     ):
         pass
@@ -177,8 +195,10 @@ class NopDB(RunDBInterface):
     def delete_function(self, name: str, project: str = ""):
         pass
-    def list_functions(self, name=None, project="", tag="", labels=None):
-        pass
+    def list_functions(
+        self, name=None, project="", tag="", labels=None, since=None, until=None
+    ):
+        return []
     def tag_objects(
         self,
@@ -306,6 +326,9 @@ class NopDB(RunDBInterface):
         partition_order: Union[
             mlrun.common.schemas.OrderType, str
         ] = mlrun.common.schemas.OrderType.desc,
+        format_: Union[
+            str, mlrun.common.formatters.FeatureSetFormat
+        ] = mlrun.common.formatters.FeatureSetFormat.full,
     ) -> list[dict]:
         pass
@@ -418,7 +441,7 @@ class NopDB(RunDBInterface):
         ] = mlrun.common.formatters.PipelineFormat.metadata_only,
         page_size: int = None,
     ) -> mlrun.common.schemas.PipelinesOutput:
-        pass
+        return mlrun.common.schemas.PipelinesOutput(runs=[], total_size=0)
     def create_project_secrets(
         self,
@@ -737,6 +760,7 @@ class NopDB(RunDBInterface):
         self,
         project: str,
         credentials: dict[str, str],
+        replace_creds: bool,
     ) -> None:
         pass

mlrun/errors.py CHANGED Viewed

@@ -29,11 +29,14 @@ class MLRunBaseError(Exception):
     pass
-class MLRunTaskNotReady(MLRunBaseError):
+class MLRunTaskNotReadyError(MLRunBaseError):
     """indicate we are trying to read a value which is not ready
     or need to come from a job which is in progress"""
+MLRunTaskNotReady = MLRunTaskNotReadyError  # kept for BC only
 class MLRunHTTPError(MLRunBaseError, requests.HTTPError):
     def __init__(
         self,
@@ -137,7 +140,13 @@ def err_to_str(err):
         error_strings.append(err_msg)
         err = err.__cause__
-    return ", caused by: ".join(error_strings)
+    err_msg = ", caused by: ".join(error_strings)
+    # in case the error string is longer than 32k, we truncate it
+    # the truncation takes the first 16k, then the last 16k characters
+    if len(err_msg) > 32_000:
+        err_msg = err_msg[:16_000] + "...truncated..." + err_msg[-16_000:]
+    return err_msg
 # Specific Errors
@@ -205,7 +214,15 @@ class MLRunTimeoutError(MLRunHTTPStatusError, TimeoutError):
     error_status_code = HTTPStatus.GATEWAY_TIMEOUT.value
-class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
+class MLRunInvalidMMStoreTypeError(MLRunHTTPStatusError, ValueError):
+    error_status_code = HTTPStatus.BAD_REQUEST.value
+class MLRunStreamConnectionFailureError(MLRunHTTPStatusError, ValueError):
+    error_status_code = HTTPStatus.BAD_REQUEST.value
+class MLRunTSDBConnectionFailureError(MLRunHTTPStatusError, ValueError):
     error_status_code = HTTPStatus.BAD_REQUEST.value

mlrun/execution.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
 import uuid
 from copy import deepcopy
@@ -23,6 +24,7 @@ from dateutil import parser
 import mlrun
 import mlrun.common.constants as mlrun_constants
+import mlrun.common.formatters
 from mlrun.artifacts import ModelArtifact
 from mlrun.datastore.store_resources import get_store_resource
 from mlrun.errors import MLRunInvalidArgumentError
@@ -78,7 +80,6 @@ class MLClientCtx:
         self._tmpfile = tmp
         self._logger = log_stream or logger
         self._log_level = "info"
-        self._matrics_db = None
         self._autocommit = autocommit
         self._notifications = []
         self._state_thresholds = {}
@@ -103,8 +104,7 @@ class MLClientCtx:
         self._error = None
         self._commit = ""
         self._host = None
-        self._start_time = now_date()
-        self._last_update = now_date()
+        self._start_time = self._last_update = now_date()
         self._iteration_results = None
         self._children = []
         self._parent = None
@@ -170,6 +170,8 @@ class MLClientCtx:
     @log_level.setter
     def log_level(self, value: str):
         """Set the logging level, e.g. 'debug', 'info', 'error'"""
+        level = logging.getLevelName(value.upper())
+        self._logger.set_logger_level(level)
         self._log_level = value
     @property
@@ -337,7 +339,7 @@ class MLClientCtx:
             "name": self.name,
             "kind": "run",
             "uri": uri,
-            "owner": get_in(self._labels, "owner"),
+            "owner": get_in(self._labels, mlrun_constants.MLRunInternalLabels.owner),
         }
         if mlrun_constants.MLRunInternalLabels.workflow in self._labels:
             resp[mlrun_constants.MLRunInternalLabels.workflow] = self._labels[
@@ -633,7 +635,9 @@ class MLClientCtx:
         :param viewer:        Kubeflow viewer type
         :param target_path:   Absolute target path (instead of using artifact_path + local_path)
         :param src_path:      Deprecated, use local_path
-        :param upload:        Upload to datastore (default is True)
+        :param upload:        Whether to upload the artifact to the datastore. If not provided, and the `local_path`
+                              is not a directory, upload occurs by default. Directories are uploaded only when this
+                              flag is explicitly set to `True`.
         :param labels:        A set of key/value labels to tag the artifact with
         :param format:        Optional, format to use (e.g. csv, parquet, ..)
         :param db_key:        The key to use in the artifact DB table, by default its run name + '_' + key
@@ -923,6 +927,43 @@ class MLClientCtx:
                 updates, self._uid, self.project, iter=self._iteration
             )
+    def get_notifications(self, unmask_secret_params=False):
+        """
+        Get the list of notifications
+        :param unmask_secret_params: Used as a workaround for sending notification from workflow-runner.
+                                     When used, if the notification will be saved again a new secret will be created.
+        """
+        # Get the full notifications from the DB since the run context does not contain the params due to bloating
+        run = self._rundb.read_run(
+            self.uid, format_=mlrun.common.formatters.RunFormat.notifications
+        )
+        notifications = []
+        for notification in run["spec"]["notifications"]:
+            notification: mlrun.model.Notification = mlrun.model.Notification.from_dict(
+                notification
+            )
+            # Fill the secret params from the project secret. We cannot use the server side internal secret mechanism
+            # here as it is the client side.
+            # TODO: This is a workaround to allow the notification to get the secret params from project secret
+            #       instead of getting them from the internal project secret that should be mounted.
+            #       We should mount the internal project secret that was created to the workflow-runner
+            #       and get the secret from there.
+            if unmask_secret_params:
+                try:
+                    notification.enrich_unmasked_secret_params_from_project_secret()
+                    notifications.append(notification)
+                except mlrun.errors.MLRunValueError:
+                    logger.warning(
+                        "Failed to fill secret params from project secret for notification."
+                        "Skip this notification.",
+                        notification=notification.name,
+                    )
+        return notifications
     def to_dict(self):
         """Convert the run context to a dictionary"""

mlrun/feature_store/api.py CHANGED Viewed

@@ -230,6 +230,11 @@ def _get_offline_features(
             "entity_timestamp_column param "
             "can not be specified without entity_rows param"
         )
+    if isinstance(target, BaseStoreTarget) and not target.support_pandas:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"get_offline_features does not support targets that do not support pandas engine."
+            f" Target kind: {target.kind}"
+        )
     if isinstance(feature_vector, FeatureVector):
         update_stats = True
@@ -1032,6 +1037,8 @@ def _ingest_with_spark(
     try:
         import pyspark.sql
+        from mlrun.datastore.spark_utils import check_special_columns_exists
         if spark is None or spark is True:
             # create spark context
@@ -1044,13 +1051,13 @@ def _ingest_with_spark(
             spark = (
                 pyspark.sql.SparkSession.builder.appName(session_name)
+                .config("spark.driver.memory", "2g")
                 .config("spark.sql.session.timeZone", "UTC")
                 .getOrCreate()
             )
             created_spark_context = True
         timestamp_key = featureset.spec.timestamp_key
         if isinstance(source, pd.DataFrame):
             df = spark.createDataFrame(source)
         elif isinstance(source, pyspark.sql.DataFrame):
@@ -1080,6 +1087,12 @@ def _ingest_with_spark(
                 target = get_target_driver(target, featureset)
             target.set_resource(featureset)
             if featureset.spec.passthrough and target.is_offline:
+                check_special_columns_exists(
+                    spark_df=df,
+                    entities=featureset.spec.entities,
+                    timestamp_key=timestamp_key,
+                    label_column=featureset.spec.label_column,
+                )
                 continue
             spark_options = target.get_spark_options(
                 key_columns, timestamp_key, overwrite
@@ -1090,6 +1103,17 @@ def _ingest_with_spark(
                 df_to_write, key_columns, timestamp_key, spark_options
             )
             write_format = spark_options.pop("format", None)
+            # We can get to this point if the column exists in different letter cases,
+            # so PySpark will be able to read it, but we still have to raise an exception for it.
+            # This check is here and not in to_spark_df because in spark_merger we can have a target
+            # that has different letter cases than the source, like in SnowflakeTarget.
+            check_special_columns_exists(
+                spark_df=df_to_write,
+                entities=featureset.spec.entities,
+                timestamp_key=timestamp_key,
+                label_column=featureset.spec.label_column,
+            )
             if overwrite:
                 write_spark_dataframe_with_options(
                     spark_options, df_to_write, "overwrite", write_format=write_format

mlrun/feature_store/common.py CHANGED Viewed

@@ -37,17 +37,12 @@ def parse_feature_string(feature):
         raise mlrun.errors.MLRunInvalidArgumentError(
             f"feature {feature} must be {expected_message}"
         )
-    splitted = feature.split(feature_separator)
-    if len(splitted) > 2:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            f"feature {feature} must be {expected_message}, cannot have more than one '.'"
-        )
-    feature_set = splitted[0]
-    feature_name = splitted[1]
-    splitted = feature_name.split(" as ")
-    if len(splitted) > 1:
-        return feature_set.strip(), splitted[0].strip(), splitted[1].strip()
-    return feature_set.strip(), feature_name.strip(), None
+    feature_set, feature_name = feature.rsplit(feature_separator, 1)
+    feature_set = feature_set.strip()
+    split_result = feature_name.split(" as ", 1)
+    feature_name = split_result[0].strip()
+    alias = split_result[1].strip() if len(split_result) > 1 else None
+    return feature_set, feature_name, alias
 def parse_project_name_from_feature_string(feature):

mlrun/feature_store/feature_vector.py CHANGED Viewed

@@ -1086,7 +1086,9 @@ class OfflineVectorResponse:
     def to_dataframe(self, to_pandas=True):
         """return result as dataframe"""
         if self.status != "completed":
-            raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
+            raise mlrun.errors.MLRunTaskNotReadyError(
+                "feature vector dataset is not ready"
+            )
         return self._merger.get_df(to_pandas=to_pandas)
     def to_parquet(self, target_path, **kw):

mlrun/feature_store/retrieval/job.py CHANGED Viewed

@@ -156,7 +156,9 @@ class RemoteVectorResponse:
     def _is_ready(self):
         if self.status != "completed":
-            raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
+            raise mlrun.errors.MLRunTaskNotReadyError(
+                "feature vector dataset is not ready"
+            )
         self.vector.reload()
     def to_dataframe(self, columns=None, df_module=None, **kwargs):
@@ -181,6 +183,7 @@ class RemoteVectorResponse:
         file_format = kwargs.get("format")
         if not file_format:
             file_format = self.run.status.results["target"]["kind"]
         df = mlrun.get_dataitem(self.target_uri).as_df(
             columns=columns, df_module=df_module, format=file_format, **kwargs
         )

mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl