PyPI - mlrun - Versions diffs - 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show

mlrun/__main__.py +4 -2
mlrun/alerts/alert.py +75 -8
mlrun/artifacts/base.py +1 -0
mlrun/artifacts/manager.py +9 -2
mlrun/common/constants.py +4 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/__init__.py +3 -1
mlrun/common/schemas/alert.py +15 -12
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/frontend_spec.py +7 -0
mlrun/common/schemas/function.py +7 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +41 -26
mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
mlrun/common/schemas/notification.py +69 -12
mlrun/common/schemas/project.py +45 -12
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +1 -0
mlrun/config.py +91 -35
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +57 -25
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +125 -37
mlrun/datastore/base.py +42 -21
mlrun/datastore/datastore.py +4 -2
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +85 -29
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +25 -12
mlrun/datastore/sources.py +76 -4
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +102 -131
mlrun/datastore/v3io.py +1 -0
mlrun/db/base.py +15 -6
mlrun/db/httpdb.py +57 -28
mlrun/db/nopdb.py +29 -5
mlrun/errors.py +20 -3
mlrun/execution.py +46 -5
mlrun/feature_store/api.py +25 -1
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/spark_merger.py +10 -39
mlrun/feature_store/steps.py +8 -0
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/k8s_utils.py +48 -2
mlrun/launcher/client.py +6 -6
mlrun/launcher/local.py +2 -2
mlrun/model.py +215 -34
mlrun/model_monitoring/api.py +38 -24
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +60 -29
mlrun/model_monitoring/applications/base.py +2 -174
mlrun/model_monitoring/applications/context.py +197 -70
mlrun/model_monitoring/applications/evidently_base.py +11 -85
mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +110 -282
mlrun/model_monitoring/db/stores/__init__.py +8 -3
mlrun/model_monitoring/db/stores/base/store.py +3 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
mlrun/model_monitoring/db/tsdb/base.py +147 -15
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
mlrun/model_monitoring/helpers.py +70 -50
mlrun/model_monitoring/stream_processing.py +96 -195
mlrun/model_monitoring/writer.py +13 -5
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/operations.py +16 -8
mlrun/projects/pipelines.py +126 -115
mlrun/projects/project.py +286 -129
mlrun/render.py +3 -3
mlrun/run.py +38 -19
mlrun/runtimes/__init__.py +19 -8
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/local.py +12 -5
mlrun/runtimes/nuclio/api_gateway.py +68 -8
mlrun/runtimes/nuclio/application/application.py +307 -70
mlrun/runtimes/nuclio/function.py +63 -14
mlrun/runtimes/nuclio/serving.py +10 -10
mlrun/runtimes/pod.py +25 -19
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +16 -17
mlrun/runtimes/utils.py +34 -0
mlrun/serving/routers.py +2 -5
mlrun/serving/server.py +37 -19
mlrun/serving/states.py +30 -3
mlrun/serving/v2_serving.py +44 -35
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +1 -1
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +150 -36
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/__init__.py +0 -1
mlrun/utils/notifications/notification/webhook.py +8 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/evidently_application.py +0 -20
mlrun/model_monitoring/prometheus.py +0 -216
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -23,6 +23,7 @@ import inflection
 import nuclio
 import nuclio.utils
 import requests
+import semver
 from aiohttp.client import ClientSession
 from kubernetes import client
 from mlrun_pipelines.common.mounts import VolumeMount
@@ -296,10 +297,37 @@ class RemoteRuntime(KubeResource):
         """
         if hasattr(spec, "to_dict"):
             spec = spec.to_dict()
+        self._validate_triggers(spec)
         spec["name"] = name
         self.spec.config[f"spec.triggers.{name}"] = spec
         return self
+    def _validate_triggers(self, spec):
+        # ML-7763 / NUC-233
+        min_nuclio_version = "1.13.12"
+        if mlconf.nuclio_version and semver.VersionInfo.parse(
+            mlconf.nuclio_version
+        ) < semver.VersionInfo.parse(min_nuclio_version):
+            explicit_ack_enabled = False
+            num_triggers = 0
+            trigger_name = spec.get("name", "UNKNOWN")
+            for key, config in [(f"spec.triggers.{trigger_name}", spec)] + list(
+                self.spec.config.items()
+            ):
+                if key.startswith("spec.triggers."):
+                    num_triggers += 1
+                    explicit_ack_enabled = (
+                        config.get("explicitAckMode", "disable") != "disable"
+                    )
+            if num_triggers > 1 and explicit_ack_enabled:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "Multiple triggers cannot be used in conjunction with explicit ack. "
+                    f"Please upgrade to nuclio {min_nuclio_version} or newer."
+                )
     def with_source_archive(
         self,
         source,
@@ -418,14 +446,8 @@ class RemoteRuntime(KubeResource):
                 raise ValueError(
                     "gateway timeout must be greater than the worker timeout"
                 )
-            annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
-                f"{gateway_timeout}"
-            )
-            annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = (
-                f"{gateway_timeout}"
-            )
-            annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = (
-                f"{gateway_timeout}"
+            mlrun.runtimes.utils.enrich_gateway_timeout_annotations(
+                annotations, gateway_timeout
             )
         trigger = nuclio.HttpTrigger(
@@ -446,6 +468,11 @@ class RemoteRuntime(KubeResource):
         return self
     def from_image(self, image):
+        """
+        Deploy the function with an existing nuclio processor image.
+        :param image: image name
+        """
         config = nuclio.config.new_config()
         update_in(
             config,
@@ -496,6 +523,11 @@ class RemoteRuntime(KubeResource):
         extra_attributes = extra_attributes or {}
         if ack_window_size:
             extra_attributes["ackWindowSize"] = ack_window_size
+        access_key = kwargs.pop("access_key", None)
+        if not access_key:
+            access_key = self._resolve_v3io_access_key()
         self.add_trigger(
             name,
             V3IOStreamTrigger(
@@ -507,11 +539,14 @@ class RemoteRuntime(KubeResource):
                 webapi=endpoint or "http://v3io-webapi:8081",
                 extra_attributes=extra_attributes,
                 read_batch_size=256,
+                access_key=access_key,
                 **kwargs,
             ),
         )
-        self.spec.min_replicas = shards
-        self.spec.max_replicas = shards
+        if self.spec.min_replicas != shards or self.spec.max_replicas != shards:
+            logger.warning(f"Setting function replicas to {shards}")
+            self.spec.min_replicas = shards
+            self.spec.max_replicas = shards
     def deploy(
         self,
@@ -566,6 +601,9 @@ class RemoteRuntime(KubeResource):
         # this also means that the function object will be updated with the function status
         self._wait_for_function_deployment(db, verbose=verbose)
+        return self._enrich_command_from_status()
+    def _enrich_command_from_status(self):
         # NOTE: on older mlrun versions & nuclio versions, function are exposed via NodePort
         #       now, functions can be not exposed (using service type ClusterIP) and hence
         #       for BC we first try to populate the external invocation url, and then
@@ -679,7 +717,7 @@ class RemoteRuntime(KubeResource):
             "State thresholds do not apply for nuclio as it has its own function pods healthiness monitoring"
         )
-    @min_nuclio_versions("1.12.8")
+    @min_nuclio_versions("1.13.1")
     def disable_default_http_trigger(
         self,
     ):
@@ -688,7 +726,7 @@ class RemoteRuntime(KubeResource):
         """
         self.spec.disable_default_http_trigger = True
-    @min_nuclio_versions("1.12.8")
+    @min_nuclio_versions("1.13.1")
     def enable_default_http_trigger(
         self,
     ):
@@ -697,6 +735,10 @@ class RemoteRuntime(KubeResource):
         """
         self.spec.disable_default_http_trigger = False
+    def skip_image_enrichment(self):
+        # make sure the API does not enrich the base image if the function is not a python function
+        return self.spec.nuclio_runtime and "python" not in self.spec.nuclio_runtime
     def _get_state(
         self,
         dashboard="",
@@ -739,7 +781,7 @@ class RemoteRuntime(KubeResource):
             return state, text, last_log_timestamp
         try:
-            text, last_log_timestamp = self._get_db().get_builder_status(
+            text, last_log_timestamp = self._get_db().get_nuclio_deploy_status(
                 self, last_log_timestamp=last_log_timestamp, verbose=verbose
             )
         except mlrun.db.RunDBError:
@@ -990,7 +1032,7 @@ class RemoteRuntime(KubeResource):
         if command and not command.startswith("http"):
             sidecar["command"] = mlrun.utils.helpers.as_list(command)
-        if args and sidecar["command"]:
+        if args and sidecar.get("command"):
             sidecar["args"] = mlrun.utils.helpers.as_list(args)
         # populate the sidecar resources from the function spec
@@ -1233,6 +1275,13 @@ class RemoteRuntime(KubeResource):
         return self._resolve_invocation_url("", force_external_address)
+    @staticmethod
+    def _resolve_v3io_access_key():
+        # Nuclio supports generating access key for v3io stream trigger only from version 1.13.11
+        if validate_nuclio_version_compatibility("1.13.11"):
+            return mlrun.model.Credentials.generate_access_key
+        return None
 def parse_logs(logs):
     logs = json.loads(logs)

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -314,8 +314,8 @@ class ServingRuntime(RemoteRuntime):
         tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
         enable_tracking: bool = True,
     ) -> None:
-        """apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
-           and analyze performance.
+        """Apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
+        and analyze performance.
         :param stream_path:         Path/url of the tracking stream e.g. v3io:///users/mike/mystream
                                     you can use the "dummy://" path for test/simulation.
@@ -325,12 +325,12 @@ class ServingRuntime(RemoteRuntime):
         :param enable_tracking:     Enabled/Disable model-monitoring tracking.
                                     Default True (tracking enabled).
-                                example::
+        Example::
-                                    # initialize a new serving function
-                                    serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
-                                    # apply model monitoring
-                                    serving_fn.set_tracking()
+            # initialize a new serving function
+            serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
+            # apply model monitoring
+            serving_fn.set_tracking()
         """
         # Applying model monitoring configurations
@@ -480,7 +480,7 @@ class ServingRuntime(RemoteRuntime):
                 trigger_args = stream.trigger_args or {}
                 engine = self.spec.graph.engine or "async"
-                if mlrun.mlconf.is_explicit_ack() and engine == "async":
+                if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
                     trigger_args["explicit_ack_mode"] = trigger_args.get(
                         "explicit_ack_mode", "explicitOnly"
                     )
@@ -676,7 +676,6 @@ class ServingRuntime(RemoteRuntime):
         """create mock server object for local testing/emulation
         :param namespace: one or list of namespaces/modules to search the steps classes/functions in
-        :param log_level: log level (error | info | debug)
         :param current_function: specify if you want to simulate a child function, * for all functions
         :param track_models: allow model tracking (disabled by default in the mock server)
         :param workdir:   working directory to locate the source code (if not the current one)
@@ -704,7 +703,7 @@ class ServingRuntime(RemoteRuntime):
             verbose=self.verbose,
             current_function=current_function,
             graph_initializer=self.spec.graph_initializer,
-            track_models=track_models and self.spec.track_models,
+            track_models=self.spec.track_models,
             function_uri=self._function_uri(),
             secret_sources=self.spec.secret_sources,
             default_content_type=self.spec.default_content_type,
@@ -715,6 +714,7 @@ class ServingRuntime(RemoteRuntime):
             namespace=namespace,
             logger=logger,
             is_mock=True,
+            monitoring_mock=track_models,
         )
         if workdir:

mlrun/runtimes/pod.py CHANGED Viewed

@@ -38,6 +38,7 @@ from ..k8s_utils import (
     generate_preemptible_nodes_affinity_terms,
     generate_preemptible_nodes_anti_affinity_terms,
     generate_preemptible_tolerations,
+    validate_node_selectors,
 )
 from ..utils import logger, update_in
 from .base import BaseRuntime, FunctionSpec, spec_fields
@@ -215,9 +216,7 @@ class KubeResourceSpec(FunctionSpec):
             image_pull_secret or mlrun.mlconf.function.spec.image_pull_secret.default
         )
         self.node_name = node_name
-        self.node_selector = (
-            node_selector or mlrun.mlconf.get_default_function_node_selector()
-        )
+        self.node_selector = node_selector or {}
         self._affinity = affinity
         self.priority_class_name = (
             priority_class_name or mlrun.mlconf.default_function_priority_class_name
@@ -532,7 +531,7 @@ class KubeResourceSpec(FunctionSpec):
             return
         # merge node selectors - precedence to existing node selector
-        self.node_selector = mlrun.utils.helpers.merge_with_precedence(
+        self.node_selector = mlrun.utils.helpers.merge_dicts_with_precedence(
             node_selector, self.node_selector
         )
@@ -1108,12 +1107,12 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
         :param state_thresholds: A dictionary of state to threshold. The supported states are:
-            * pending_scheduled - The pod/crd is scheduled on a node but not yet running
-            * pending_not_scheduled - The pod/crd is not yet scheduled on a node
-            * executing - The pod/crd started and is running
-            * image_pull_backoff - The pod/crd is in image pull backoff
-            See mlrun.mlconf.function.spec.state_thresholds for the default thresholds.
+                                 * pending_scheduled - The pod/crd is scheduled on a node but not yet running
+                                 * pending_not_scheduled - The pod/crd is not yet scheduled on a node
+                                 * executing - The pod/crd started and is running
+                                 * image_pull_backoff - The pod/crd is in image pull backoff
+                                See :code:`mlrun.mlconf.function.spec.state_thresholds` for the default thresholds.
         :param patch: Whether to merge the given thresholds with the existing thresholds (True, default)
                       or override them (False)
         """
@@ -1176,9 +1175,10 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
         """
         if node_name:
             self.spec.node_name = node_name
-        if node_selector:
+        if node_selector is not None:
+            validate_node_selectors(node_selectors=node_selector, raise_on_error=False)
             self.spec.node_selector = node_selector
-        if affinity:
+        if affinity is not None:
             self.spec.affinity = affinity
         if tolerations is not None:
             self.spec.tolerations = tolerations
@@ -1347,20 +1347,26 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
     def _build_image(
         self,
-        builder_env,
-        force_build,
-        mlrun_version_specifier,
-        show_on_failure,
-        skip_deployed,
-        watch,
-        is_kfp,
-        with_mlrun,
+        builder_env: dict,
+        force_build: bool,
+        mlrun_version_specifier: typing.Optional[bool],
+        show_on_failure: bool,
+        skip_deployed: bool,
+        watch: bool,
+        is_kfp: bool,
+        with_mlrun: typing.Optional[bool],
     ):
         # When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
         # is actually done. (when a pipelines pod exits, the pipeline step marked as done)
         if is_kfp:
             watch = True
+        if skip_deployed and self.requires_build() and not self.is_deployed():
+            logger.warning(
+                f"Even though {skip_deployed=}, the build might be triggered due to the function's configuration. "
+                "See requires_build() and is_deployed() for reasoning."
+            )
         db = self._get_db()
         data = db.remote_builder(
             self,

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -102,16 +102,13 @@ class RemoteSparkRuntime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls):
-        from mlrun import get_run_db
-        from mlrun.run import new_function
-        sj = new_function(
+        sj = mlrun.new_function(
             kind="remote-spark", name="remote-spark-default-image-deploy-temp"
         )
         sj.spec.build.image = cls.default_image
         sj.with_spark_service(spark_service="dummy-spark")
         sj.deploy()
-        get_run_db().delete_function(name=sj.metadata.name)
+        mlrun.get_run_db().delete_function(name=sj.metadata.name)
     def is_deployed(self):
         if (

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -18,6 +18,7 @@ from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
 import mlrun.common.schemas.function
 import mlrun.errors
+import mlrun.k8s_utils
 import mlrun.runtimes.pod
 from mlrun.config import config
@@ -451,7 +452,7 @@ class Spark3JobSpec(KubeResourceSpec):
 class Spark3Runtime(KubejobRuntime):
     group = "sparkoperator.k8s.io"
     version = "v1beta2"
-    apiVersion = group + "/" + version
+    apiVersion = group + "/" + version  # noqa: N815
     kind = "spark"
     plural = "sparkapplications"
@@ -505,13 +506,11 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        # TODO add affinity support
-        # https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go#L491
-        if affinity:
-            raise NotImplementedError(
-                "Setting affinity is not supported for spark runtime"
-            )
-        super().with_node_selection(node_name, node_selector, affinity, tolerations)
+        mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
+        self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
+        self.with_executor_node_selection(
+            node_name, node_selector, affinity, tolerations
+        )
     def with_driver_node_selection(
         self,
@@ -537,11 +536,12 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        if affinity:
+        if affinity is not None:
             self.spec.driver_affinity = affinity
-        if node_selector:
+        if node_selector is not None:
+            mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
             self.spec.driver_node_selector = node_selector
-        if tolerations:
+        if tolerations is not None:
             self.spec.driver_tolerations = tolerations
     def with_executor_node_selection(
@@ -568,11 +568,12 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        if affinity:
+        if affinity is not None:
             self.spec.executor_affinity = affinity
-        if node_selector:
+        if node_selector is not None:
+            mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
             self.spec.executor_node_selector = node_selector
-        if tolerations:
+        if tolerations is not None:
             self.spec.executor_tolerations = tolerations
     def with_preemption_mode(
@@ -811,9 +812,7 @@ class Spark3Runtime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls, with_gpu=False):
-        from mlrun.run import new_function
-        sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
+        sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
         sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
         # setting required resources

mlrun/runtimes/utils.py CHANGED Viewed

@@ -445,3 +445,37 @@ def enrich_run_labels(
         if label.value not in labels and enrichment:
             labels[label.value] = enrichment
     return labels
+def resolve_node_selectors(
+    project_node_selector: dict, instance_node_selector: dict
+) -> dict:
+    config_node_selector = mlrun.mlconf.get_default_function_node_selector()
+    if project_node_selector or config_node_selector:
+        mlrun.utils.logger.debug(
+            "Enriching node selector from project and mlrun config",
+            project_node_selector=project_node_selector,
+            config_node_selector=config_node_selector,
+        )
+        return mlrun.utils.helpers.merge_dicts_with_precedence(
+            config_node_selector,
+            project_node_selector,
+            instance_node_selector,
+        )
+    return instance_node_selector
+def enrich_gateway_timeout_annotations(annotations: dict, gateway_timeout: int):
+    """
+    Set gateway proxy connect/read/send timeout annotations
+    :param annotations:     The annotations to enrich
+    :param gateway_timeout: The timeout to set
+    """
+    if not gateway_timeout:
+        return
+    gateway_timeout_str = str(gateway_timeout)
+    annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
+        gateway_timeout_str
+    )
+    annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = gateway_timeout_str
+    annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = gateway_timeout_str

mlrun/serving/routers.py CHANGED Viewed

@@ -32,7 +32,6 @@ from mlrun.errors import err_to_str
 from mlrun.utils import logger, now_date
 from ..common.helpers import parse_versioned_object_uri
-from ..config import config
 from .server import GraphServer
 from .utils import RouterToDict, _extract_input_data, _update_result_body
 from .v2_serving import _ModelLogPusher
@@ -616,7 +615,7 @@ class VotingEnsemble(ParallelRun):
             logger.warn("GraphServer not initialized for VotingEnsemble instance")
             return
-        if not self.context.is_mock or self.context.server.track_models:
+        if not self.context.is_mock or self.context.monitoring_mock:
             self.model_endpoint_uid = _init_endpoint_record(server, self)
         self._update_weights(self.weights)
@@ -1057,9 +1056,7 @@ def _init_endpoint_record(
                 function_uri=graph_server.function_uri,
                 model=versioned_model_name,
                 model_class=voting_ensemble.__class__.__name__,
-                stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
-                    project=project, kind="stream"
-                ),
+                stream_path=voting_ensemble.context.stream.stream_uri,
                 active=True,
                 monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
             ),

mlrun/serving/server.py CHANGED Viewed

@@ -22,10 +22,14 @@ import traceback
 import uuid
 from typing import Optional, Union
+from nuclio import Context as NuclioContext
+from nuclio.request import Logger as NuclioLogger
 import mlrun
 import mlrun.common.constants
 import mlrun.common.helpers
 import mlrun.model_monitoring
+import mlrun.utils
 from mlrun.config import config
 from mlrun.errors import err_to_str
 from mlrun.secrets import SecretsStore
@@ -38,10 +42,7 @@ from ..errors import MLRunInvalidArgumentError
 from ..model import ModelObj
 from ..utils import get_caller_globals
 from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
-from .utils import (
-    event_id_key,
-    event_path_key,
-)
+from .utils import event_id_key, event_path_key
 class _StreamContext:
@@ -71,15 +72,15 @@ class _StreamContext:
                 function_uri, config.default_project
             )
-            stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
+            self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
             if log_stream:
                 # Update the stream path to the log stream value
-                stream_uri = log_stream.format(project=project)
+                self.stream_uri = log_stream.format(project=project)
             stream_args = parameters.get("stream_args", {})
-            self.output_stream = get_stream_pusher(stream_uri, **stream_args)
+            self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
 class GraphServer(ModelObj):
@@ -153,6 +154,7 @@ class GraphServer(ModelObj):
         resource_cache: ResourceCache = None,
         logger=None,
         is_mock=False,
+        monitoring_mock=False,
     ):
         """for internal use, initialize all steps (recursively)"""
@@ -165,6 +167,7 @@ class GraphServer(ModelObj):
         context = GraphContext(server=self, nuclio_context=context, logger=logger)
         context.is_mock = is_mock
+        context.monitoring_mock = monitoring_mock
         context.root = self.graph
         context.stream = _StreamContext(
@@ -321,9 +324,9 @@ def v2_serving_init(context, namespace=None):
         server.http_trigger = getattr(context.trigger, "kind", "http") == "http"
     context.logger.info_with(
         "Setting current function",
-        current_functiton=os.environ.get("SERVING_CURRENT_FUNCTION", ""),
+        current_function=os.getenv("SERVING_CURRENT_FUNCTION", ""),
     )
-    server.set_current_function(os.environ.get("SERVING_CURRENT_FUNCTION", ""))
+    server.set_current_function(os.getenv("SERVING_CURRENT_FUNCTION", ""))
     context.logger.info_with(
         "Initializing states", namespace=namespace or get_caller_globals()
     )
@@ -344,9 +347,14 @@ def v2_serving_init(context, namespace=None):
     if server.verbose:
         context.logger.info(server.to_yaml())
-    if hasattr(context, "platform") and hasattr(
-        context.platform, "set_termination_callback"
-    ):
+    _set_callbacks(server, context)
+def _set_callbacks(server, context):
+    if not server.graph.supports_termination() or not hasattr(context, "platform"):
+        return
+    if hasattr(context.platform, "set_termination_callback"):
         context.logger.info(
             "Setting termination callback to terminate graph on worker shutdown"
         )
@@ -358,7 +366,7 @@ def v2_serving_init(context, namespace=None):
         context.platform.set_termination_callback(termination_callback)
-    if hasattr(context, "platform") and hasattr(context.platform, "set_drain_callback"):
+    if hasattr(context.platform, "set_drain_callback"):
         context.logger.info(
             "Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
         )
@@ -385,12 +393,16 @@ def v2_serving_handler(context, event, get_body=False):
     # original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
     # workaround for NUC-178
-    event.stream_path = event.path
+    # nuclio 1.12.12 added the topic attribute, and we must use it as part of the fix for NUC-233
+    # TODO: Remove fallback on event.path once support for nuclio<1.12.12 is dropped
+    event.stream_path = getattr(event, "topic", event.path)
     if hasattr(event, "trigger") and event.trigger.kind in (
         "kafka",
         "kafka-cluster",
         "v3ioStream",
         "v3io-stream",
+        "rabbit-mq",
+        "rabbitMq",
     ):
         event.path = "/"
@@ -417,7 +429,7 @@ def create_graph_server(
     parameters = parameters or {}
     server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
     server.set_current_function(
-        current_function or os.environ.get("SERVING_CURRENT_FUNCTION", "")
+        current_function or os.getenv("SERVING_CURRENT_FUNCTION", "")
     )
     return server
@@ -481,7 +493,13 @@ class Response:
 class GraphContext:
     """Graph context object"""
-    def __init__(self, level="info", logger=None, server=None, nuclio_context=None):
+    def __init__(
+        self,
+        level="info",  # Unused argument
+        logger=None,
+        server=None,
+        nuclio_context: Optional[NuclioContext] = None,
+    ) -> None:
         self.state = None
         self.logger = logger
         self.worker_id = 0
@@ -491,7 +509,7 @@ class GraphContext:
         self.root = None
         if nuclio_context:
-            self.logger = nuclio_context.logger
+            self.logger: NuclioLogger = nuclio_context.logger
             self.Response = nuclio_context.Response
             if hasattr(nuclio_context, "trigger") and hasattr(
                 nuclio_context.trigger, "kind"
@@ -501,7 +519,7 @@ class GraphContext:
             if hasattr(nuclio_context, "platform"):
                 self.platform = nuclio_context.platform
         elif not logger:
-            self.logger = mlrun.utils.helpers.logger
+            self.logger: mlrun.utils.Logger = mlrun.utils.logger
         self._server = server
         self.current_function = None
@@ -514,7 +532,7 @@ class GraphContext:
         return self._server
     @property
-    def project(self):
+    def project(self) -> str:
         """current project name (for the current function)"""
         project, _, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
             self._server.function_uri

mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl