PyPI - mlrun - Versions diffs - 1.7.0rc37__py3-none-any.whl → 1.7.0rc38__py3-none-any.whl - Mend

mlrun 1.7.0rc37py3-none-any.whl → 1.7.0rc38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (28) hide show

mlrun/alerts/alert.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +4 -0
mlrun/common/schemas/notification.py +3 -3
mlrun/datastore/azure_blob.py +120 -30
mlrun/feature_store/common.py +6 -11
mlrun/model.py +5 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -6
mlrun/model_monitoring/db/tsdb/base.py +121 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +65 -5
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +23 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +211 -35
mlrun/model_monitoring/stream_processing.py +67 -25
mlrun/projects/operations.py +1 -1
mlrun/projects/project.py +7 -1
mlrun/runtimes/__init__.py +15 -8
mlrun/runtimes/nuclio/application/application.py +45 -5
mlrun/runtimes/pod.py +2 -2
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +7 -9
mlrun/serving/v2_serving.py +1 -0
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/METADATA +7 -1
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/RECORD +28 -28
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc37.dist-info → mlrun-1.7.0rc38.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from datetime import datetime
+from datetime import datetime, timezone
 from io import StringIO
 from typing import Literal, Optional, Union
@@ -33,7 +33,7 @@ _TSDB_RATE = "1/s"
 _CONTAINER = "users"
-def _is_no_schema_error(exc: v3io_frames.ReadError) -> bool:
+def _is_no_schema_error(exc: v3io_frames.Error) -> bool:
     """
     In case of a nonexistent TSDB table - a `v3io_frames.ReadError` error is raised.
     Check if the error message contains the relevant string to verify the cause.
@@ -89,6 +89,19 @@ class V3IOTSDBConnector(TSDBConnector):
         )
         self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
+        errors_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
+            project=self.project,
+            kind=mm_schemas.FileTargetKind.ERRORS,
+        )
+        (
+            _,
+            _,
+            errors_path,
+        ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
+            errors_table_full_path
+        )
+        self.tables[mm_schemas.V3IOTSDBTables.ERRORS] = errors_path
         monitoring_application_full_path = (
             mlrun.mlconf.get_model_monitoring_file_target_path(
                 project=self.project,
@@ -160,7 +173,6 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
         # Write latency per prediction, labeled by endpoint ID only
         graph.add_step(
             "storey.TSDBTarget",
@@ -171,7 +183,10 @@ class V3IOTSDBConnector(TSDBConnector):
             time_col=mm_schemas.EventFieldType.TIMESTAMP,
             container=self.container,
             v3io_frames=self.v3io_framesd,
-            columns=[mm_schemas.EventFieldType.LATENCY],
+            columns=[
+                mm_schemas.EventFieldType.LATENCY,
+                mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP,
+            ],
             index_cols=[
                 mm_schemas.EventFieldType.ENDPOINT_ID,
             ],
@@ -255,6 +270,40 @@ class V3IOTSDBConnector(TSDBConnector):
         apply_storey_filter()
         apply_tsdb_target(name="tsdb3", after="FilterNotNone")
+    def handle_model_error(
+        self,
+        graph,
+        tsdb_batching_max_events: int = 10,
+        tsdb_batching_timeout_secs: int = 60,
+        **kwargs,
+    ) -> None:
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ErrorExtractor",
+            name="error_extractor",
+            after="ForwardError",
+        )
+        graph.add_step(
+            "storey.TSDBTarget",
+            name="tsdb_error",
+            after="error_extractor",
+            path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.ERRORS]}",
+            rate="1/s",
+            time_col=mm_schemas.EventFieldType.TIMESTAMP,
+            container=self.container,
+            v3io_frames=self.v3io_framesd,
+            columns=[
+                mm_schemas.EventFieldType.MODEL_ERROR,
+                mm_schemas.EventFieldType.ERROR_COUNT,
+            ],
+            index_cols=[
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            max_events=tsdb_batching_max_events,
+            flush_after_seconds=tsdb_batching_timeout_secs,
+            key=mm_schemas.EventFieldType.ENDPOINT_ID,
+        )
     def write_application_event(
         self,
         event: dict,
@@ -437,7 +486,7 @@ class V3IOTSDBConnector(TSDBConnector):
                 step=sliding_window_step,
                 **kwargs,
             )
-        except v3io_frames.ReadError as err:
+        except v3io_frames.Error as err:
             if _is_no_schema_error(err):
                 return pd.DataFrame()
             else:
@@ -504,10 +553,16 @@ class V3IOTSDBConnector(TSDBConnector):
         if type == "metrics":
             table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
             name = mm_schemas.MetricData.METRIC_NAME
+            columns = [mm_schemas.MetricData.METRIC_VALUE]
             df_handler = self.df_to_metrics_values
         elif type == "results":
             table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
             name = mm_schemas.ResultData.RESULT_NAME
+            columns = [
+                mm_schemas.ResultData.RESULT_VALUE,
+                mm_schemas.ResultData.RESULT_STATUS,
+                mm_schemas.ResultData.RESULT_KIND,
+            ]
             df_handler = self.df_to_results_values
         else:
             raise ValueError(f"Invalid {type = }")
@@ -517,6 +572,7 @@ class V3IOTSDBConnector(TSDBConnector):
             metric_and_app_names=[(metric.app, metric.name) for metric in metrics],
             table_path=table_path,
             name=name,
+            columns=columns,
         )
         logger.debug("Querying V3IO TSDB", query=query)
@@ -627,33 +683,153 @@ class V3IOTSDBConnector(TSDBConnector):
             ),  # pyright: ignore[reportArgumentType]
         )
-    # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
-    #
-    # def read_prediction_metric_for_endpoint_if_exists(
-    #     self, endpoint_id: str
-    # ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
-    #     """
-    #     Read the count of the latency column in the predictions table for the given endpoint_id.
-    #     We just want to check if there is any data for this endpoint_id.
-    #     """
-    #     query = self._get_sql_query(
-    #         endpoint_id=endpoint_id,
-    #         table_path=self.tables[mm_schemas.FileTargetKind.PREDICTIONS],
-    #         columns=[f"count({mm_schemas.EventFieldType.LATENCY})"],
-    #     )
-    #     try:
-    #         logger.debug("Checking TSDB", project=self.project, query=query)
-    #         df: pd.DataFrame = self._frames_client.read(
-    #             backend=_TSDB_BE, query=query, start="0", end="now"
-    #         )
-    #     except v3io_frames.ReadError as err:
-    #         if _is_no_schema_error(err):
-    #             logger.debug(
-    #                 "No predictions yet", project=self.project, endpoint_id=endpoint_id
-    #             )
-    #             return
-    #         else:
-    #             raise
-    #
-    #     if not df.empty:
-    #         return get_invocations_metric(self.project)
+    def get_last_request(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            start=start,
+            end=end,
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"last({mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP})": mm_schemas.EventFieldType.LAST_REQUEST,
+                    f"last({mm_schemas.EventFieldType.LATENCY})": f"last_{mm_schemas.EventFieldType.LATENCY}",
+                },
+                inplace=True,
+            )
+            df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
+                mm_schemas.EventFieldType.LAST_REQUEST
+            ].map(
+                lambda last_request: datetime.fromtimestamp(
+                    last_request, tz=timezone.utc
+                )
+            )
+        return df.reset_index(drop=True)
+    def get_drift_status(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "now-24h",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["max"],
+            group_by="endpoint_id",
+        )
+        if not df.empty:
+            df.columns = [
+                col[len("max(") : -1] if "max(" in col else col for col in df.columns
+            ]
+        return df.reset_index(drop=True)
+    def get_metrics_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.METRICS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.MetricData.METRIC_VALUE],
+            filter_query=f"endpoint_id=='{endpoint_id}'",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.drop(
+                columns=[f"last({mm_schemas.MetricData.METRIC_VALUE})"], inplace=True
+            )
+        return df.reset_index(drop=True)
+    def get_results_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.ResultData.RESULT_KIND,
+            ],
+            filter_query=f"endpoint_id=='{endpoint_id}'",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND
+                },
+                inplace=True,
+            )
+        return df.reset_index(drop=True)
+    def get_error_count(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.ERRORS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.EventFieldType.ERROR_COUNT],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["count"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"count({mm_schemas.EventFieldType.ERROR_COUNT})": mm_schemas.EventFieldType.ERROR_COUNT
+                },
+                inplace=True,
+            )
+            df.dropna(inplace=True)
+        return df.reset_index(drop=True)
+    def get_avg_latency(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.EventFieldType.LATENCY],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["avg"],
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df.reset_index(drop=True)

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -169,11 +169,40 @@ class EventStreamProcessor:
             mlrun.serving.states.RootFlowStep,
             fn.set_topology(mlrun.serving.states.StepKinds.flow),
         )
+        graph.add_step(
+            "ExtractEndpointID",
+            "extract_endpoint",
+            full_event=True,
+        )
+        # split the graph between event with error vs valid event
+        graph.add_step(
+            "storey.Filter",
+            "FilterError",
+            after="extract_endpoint",
+            _fn="(event.get('error') is None)",
+        )
+        graph.add_step(
+            "storey.Filter",
+            "ForwardError",
+            after="extract_endpoint",
+            _fn="(event.get('error') is not None)",
+        )
+        tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
+            project=self.project, secret_provider=secret_provider
+        )
+        tsdb_connector.handle_model_error(
+            graph,
+        )
         # Process endpoint event: splitting into sub-events and validate event data
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
+                after="FilterError",
                 full_event=True,
                 project=self.project,
             )
@@ -295,9 +324,6 @@ class EventStreamProcessor:
         apply_storey_sample_window()
-        tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
-            project=self.project, secret_provider=secret_provider
-        )
         tsdb_connector.apply_monitoring_stream_steps(graph=graph)
         # Parquet branch
@@ -386,6 +412,38 @@ class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
         return e
+class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
+    def __init__(self, **kwargs) -> None:
+        """
+        Generate the model endpoint ID based on the event parameters and attach it to the event.
+        """
+        super().__init__(**kwargs)
+    def do(self, full_event) -> typing.Union[storey.Event, None]:
+        # Getting model version and function uri from event
+        # and use them for retrieving the endpoint_id
+        function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
+        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
+            return None
+        model = full_event.body.get(EventFieldType.MODEL)
+        if not is_not_none(model, [EventFieldType.MODEL]):
+            return None
+        version = full_event.body.get(EventFieldType.VERSION)
+        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
+        endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
+            function_uri=function_uri,
+            versioned_model=versioned_model,
+        )
+        endpoint_id = str(endpoint_id)
+        full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
+        full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
+        return full_event
 class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
         """
@@ -459,28 +517,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
     def do(self, full_event):
         event = full_event.body
-        # Getting model version and function uri from event
-        # and use them for retrieving the endpoint_id
-        function_uri = event.get(EventFieldType.FUNCTION_URI)
-        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
-            return None
-        model = event.get(EventFieldType.MODEL)
-        if not is_not_none(model, [EventFieldType.MODEL]):
-            return None
-        version = event.get(EventFieldType.VERSION)
-        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
-        endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
-            function_uri=function_uri,
-            versioned_model=versioned_model,
-        )
-        endpoint_id = str(endpoint_id)
-        event[EventFieldType.VERSIONED_MODEL] = versioned_model
-        event[EventFieldType.ENDPOINT_ID] = endpoint_id
+        versioned_model = event[EventFieldType.VERSIONED_MODEL]
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        function_uri = event[EventFieldType.FUNCTION_URI]
         # In case this process fails, resume state from existing record
         self.resume_state(endpoint_id)
@@ -598,6 +637,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     EventFieldType.PREDICTION: prediction,
                     EventFieldType.FIRST_REQUEST: self.first_request[endpoint_id],
                     EventFieldType.LAST_REQUEST: self.last_request[endpoint_id],
+                    EventFieldType.LAST_REQUEST_TIMESTAMP: mlrun.utils.enrich_datetime_with_tz_info(
+                        self.last_request[endpoint_id]
+                    ).timestamp(),
                     EventFieldType.ERROR_COUNT: self.error_count[endpoint_id],
                     EventFieldType.LABELS: event.get(EventFieldType.LABELS, {}),
                     EventFieldType.METRICS: event.get(EventFieldType.METRICS, {}),

mlrun/projects/operations.py CHANGED Viewed

@@ -189,7 +189,7 @@ def run_function(
     if engine == "kfp":
         if schedule:
             raise mlrun.errors.MLRunInvalidArgumentError(
-                "Scheduling job is not supported when running a workflow with kfp engine."
+                "Scheduling jobs is not supported when running a workflow with the kfp engine."
             )
         return function.as_step(
             name=name, runspec=task, workdir=workdir, outputs=outputs, labels=labels

mlrun/projects/project.py CHANGED Viewed

@@ -600,6 +600,10 @@ def _run_project_setup(
     if hasattr(mod, "setup"):
         try:
             project = getattr(mod, "setup")(project)
+            if not project or not isinstance(project, mlrun.projects.MlrunProject):
+                raise ValueError(
+                    "MLRun project_setup:setup() must return a project object"
+                )
         except Exception as exc:
             logger.error(
                 "Failed to run project_setup script",
@@ -610,7 +614,9 @@ def _run_project_setup(
         if save:
             project.save()
     else:
-        logger.warn("skipping setup, setup() handler was not found in project_setup.py")
+        logger.warn(
+            f"skipping setup, setup() handler was not found in {path.basename(setup_file_path)}"
+        )
     return project

mlrun/runtimes/__init__.py CHANGED Viewed

@@ -30,6 +30,8 @@ __all__ = [
     "MpiRuntimeV1",
 ]
+import typing
 from mlrun.runtimes.utils import resolve_spark_operator_version
 from ..common.runtimes.constants import MPIJobCRDVersions
@@ -181,7 +183,7 @@ class RuntimeKinds:
         ]
     @staticmethod
-    def is_log_collectable_runtime(kind: str):
+    def is_log_collectable_runtime(kind: typing.Optional[str]):
         """
         whether log collector can collect logs for that runtime
         :param kind: kind name
@@ -192,13 +194,18 @@ class RuntimeKinds:
         if RuntimeKinds.is_local_runtime(kind):
             return False
-        if kind not in [
-            # dask implementation is different than other runtimes, because few runs can be run against the same runtime
-            # resource, so collecting logs on that runtime resource won't be correct, the way we collect logs for dask
-            # is by using `log_std` on client side after we execute the code against the cluster, as submitting the
-            # run with the dask client will return the run stdout. for more information head to `DaskCluster._run`
-            RuntimeKinds.dask
-        ]:
+        if (
+            kind
+            not in [
+                # dask implementation is different from other runtimes, because few runs can be run against the same
+                # runtime resource, so collecting logs on that runtime resource won't be correct, the way we collect
+                # logs for dask is by using `log_std` on client side after we execute the code against the cluster,
+                # as submitting the run with the dask client will return the run stdout.
+                # For more information head to `DaskCluster._run`.
+                RuntimeKinds.dask
+            ]
+            + RuntimeKinds.nuclio_runtimes()
+        ):
             return True
         return False

mlrun/runtimes/nuclio/application/application.py CHANGED Viewed

@@ -122,6 +122,11 @@ class ApplicationSpec(NuclioSpec):
             state_thresholds=state_thresholds,
             disable_default_http_trigger=disable_default_http_trigger,
         )
+        # Override default min/max replicas (don't assume application is stateless)
+        self.min_replicas = min_replicas or 1
+        self.max_replicas = max_replicas or 1
         self.internal_application_port = (
             internal_application_port
             or mlrun.mlconf.function.application.default_sidecar_internal_port
@@ -169,7 +174,7 @@ class ApplicationStatus(NuclioStatus):
         self.application_source = application_source or None
         self.sidecar_name = sidecar_name or None
         self.api_gateway_name = api_gateway_name or None
-        self.api_gateway = api_gateway or None
+        self.api_gateway: typing.Optional[APIGateway] = api_gateway or None
         self.url = url or None
@@ -254,6 +259,15 @@ class ApplicationRuntime(RemoteRuntime):
                     "Application sidecar spec must include a command if args are provided"
                 )
+    def prepare_image_for_deploy(self):
+        if self.spec.build.source and self.spec.build.load_source_on_run:
+            logger.warning(
+                "Application runtime requires loading the source into the application image. "
+                f"Even though {self.spec.build.load_source_on_run=}, loading on build will be forced."
+            )
+            self.spec.build.load_source_on_run = False
+        super().prepare_image_for_deploy()
     def deploy(
         self,
         project="",
@@ -275,6 +289,7 @@ class ApplicationRuntime(RemoteRuntime):
         """
         Deploy function, builds the application image if required (self.requires_build()) or force_build is True,
         Once the image is built, the function is deployed.
         :param project:                 Project name
         :param tag:                     Function tag
         :param verbose:                 Set True for verbose logging
@@ -349,9 +364,13 @@ class ApplicationRuntime(RemoteRuntime):
         )
     def with_source_archive(
-        self, source, workdir=None, pull_at_runtime=True, target_dir=None
+        self,
+        source,
+        workdir=None,
+        pull_at_runtime: bool = False,
+        target_dir: str = None,
     ):
-        """load the code from git/tar/zip archive at runtime or build
+        """load the code from git/tar/zip archive at build
         :param source:          valid absolute path or URL to git, zip, or tar file, e.g.
                                 git://github.com/mlrun/something.git
@@ -359,13 +378,20 @@ class ApplicationRuntime(RemoteRuntime):
                                 note path source must exist on the image or exist locally when run is local
                                 (it is recommended to use 'workdir' when source is a filepath instead)
         :param workdir:         working dir relative to the archive root (e.g. './subdir') or absolute to the image root
-        :param pull_at_runtime: load the archive into the container at job runtime vs on build/deploy
+        :param pull_at_runtime: currently not supported, source must be loaded into the image during the build process
         :param target_dir:      target dir on runtime pod or repo clone / archive extraction
         """
+        if pull_at_runtime:
+            logger.warning(
+                f"{pull_at_runtime=} is currently not supported for application runtime "
+                "and will be overridden to False",
+                pull_at_runtime=pull_at_runtime,
+            )
         self._configure_mlrun_build_with_source(
             source=source,
             workdir=workdir,
-            pull_at_runtime=pull_at_runtime,
+            pull_at_runtime=False,
             target_dir=target_dir,
         )
@@ -551,6 +577,13 @@ class ApplicationRuntime(RemoteRuntime):
                 args=self.spec.args,
             )
+        if self.spec.build.source in [".", "./"]:
+            logger.info(
+                "The application is configured to use the project's source. "
+                "Application runtime requires loading the source into the application image. "
+                "Loading on build will be forced regardless of whether 'pull_at_runtime=True' was configured."
+            )
         with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
         return self._build_image(
             builder_env=builder_env,
@@ -580,6 +613,13 @@ class ApplicationRuntime(RemoteRuntime):
         )
         function.spec.nuclio_runtime = mlrun.utils.get_in(spec, "spec.runtime")
+        # default the reverse proxy logger level to info
+        logger_sinks_key = "spec.loggerSinks"
+        if not function.spec.config.get(logger_sinks_key):
+            function.set_config(
+                logger_sinks_key, [{"level": "info", "sink": "myStdoutLoggerSink"}]
+            )
     def _configure_application_sidecar(self):
         # Save the application image in the status to allow overriding it with the reverse proxy entry point
         if self.spec.image and (

mlrun/runtimes/pod.py CHANGED Viewed

@@ -1174,9 +1174,9 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
         """
         if node_name:
             self.spec.node_name = node_name
-        if node_selector:
+        if node_selector is not None:
             self.spec.node_selector = node_selector
-        if affinity:
+        if affinity is not None:
             self.spec.affinity = affinity
         if tolerations is not None:
             self.spec.tolerations = tolerations

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -102,16 +102,13 @@ class RemoteSparkRuntime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls):
-        from mlrun import get_run_db
-        from mlrun.run import new_function
-        sj = new_function(
+        sj = mlrun.new_function(
             kind="remote-spark", name="remote-spark-default-image-deploy-temp"
         )
         sj.spec.build.image = cls.default_image
         sj.with_spark_service(spark_service="dummy-spark")
         sj.deploy()
-        get_run_db().delete_function(name=sj.metadata.name)
+        mlrun.get_run_db().delete_function(name=sj.metadata.name)
     def is_deployed(self):
         if (

mlrun 1.7.0rc37__py3-none-any.whl → 1.7.0rc38__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc37py3-none-any.whl → 1.7.0rc38py3-none-any.whl