PyPI - mlrun - Versions diffs - 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl - Mend

mlrun 1.7.0rc4py3-none-any.whl → 1.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +39 -121
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +39 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +73 -46
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +73 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +46 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +44 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +11 -1
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +31 -4
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +196 -0
mlrun/common/schemas/artifact.py +28 -1
mlrun/common/schemas/auth.py +13 -2
mlrun/common/schemas/client_spec.py +2 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +58 -28
mlrun/common/schemas/frontend_spec.py +8 -0
mlrun/common/schemas/function.py +11 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +21 -4
mlrun/common/schemas/model_monitoring/constants.py +136 -42
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
mlrun/common/schemas/notification.py +69 -12
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +7 -0
mlrun/common/schemas/project.py +67 -16
mlrun/common/schemas/runs.py +17 -0
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +14 -1
mlrun/config.py +233 -58
mlrun/data_types/data_types.py +11 -1
mlrun/data_types/spark.py +5 -4
mlrun/data_types/to_pandas.py +75 -34
mlrun/datastore/__init__.py +8 -10
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +131 -43
mlrun/datastore/base.py +107 -47
mlrun/datastore/datastore.py +17 -7
mlrun/datastore/datastore_profile.py +91 -7
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +92 -32
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +3 -2
mlrun/datastore/s3.py +30 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +274 -59
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +387 -119
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +28 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +245 -20
mlrun/db/factory.py +1 -4
mlrun/db/httpdb.py +909 -231
mlrun/db/nopdb.py +279 -14
mlrun/errors.py +35 -5
mlrun/execution.py +111 -38
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +46 -53
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +13 -2
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +38 -19
mlrun/features.py +6 -14
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +4 -4
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +57 -12
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +15 -5
mlrun/launcher/remote.py +10 -3
mlrun/lists.py +6 -2
mlrun/model.py +297 -48
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +152 -357
mlrun/model_monitoring/applications/__init__.py +10 -0
mlrun/model_monitoring/applications/_application_steps.py +190 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +130 -303
mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +177 -39
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +165 -398
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/packagers/default_packager.py +2 -2
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +67 -228
mlrun/projects/__init__.py +6 -1
mlrun/projects/operations.py +47 -20
mlrun/projects/pipelines.py +396 -249
mlrun/projects/project.py +1176 -406
mlrun/render.py +28 -22
mlrun/run.py +208 -181
mlrun/runtimes/__init__.py +76 -11
mlrun/runtimes/base.py +54 -24
mlrun/runtimes/daskjob.py +9 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +39 -10
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/api_gateway.py +769 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +188 -68
mlrun/runtimes/nuclio/serving.py +57 -60
mlrun/runtimes/pod.py +191 -58
mlrun/runtimes/remotesparkjob.py +11 -8
mlrun/runtimes/sparkjob/spark3job.py +17 -18
mlrun/runtimes/utils.py +40 -73
mlrun/secrets.py +6 -2
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +89 -64
mlrun/serving/server.py +54 -26
mlrun/serving/states.py +187 -56
mlrun/serving/utils.py +19 -11
mlrun/serving/v2_serving.py +136 -63
mlrun/track/tracker.py +2 -1
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +26 -6
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +375 -105
mlrun/utils/http.py +2 -2
mlrun/utils/logger.py +75 -9
mlrun/utils/notifications/notification/__init__.py +14 -10
mlrun/utils/notifications/notification/base.py +48 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +63 -2
mlrun/utils/notifications/notification_pusher.py +146 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +2 -3
mlrun/utils/version/version.json +2 -2
mlrun-1.7.2.dist-info/METADATA +390 -0
mlrun-1.7.2.dist-info/RECORD +351 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/prometheus.py +0 -216
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc4.dist-info/METADATA +0 -269
mlrun-1.7.0rc4.dist-info/RECORD +0 -321
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -14,15 +14,16 @@
 import typing
 import kubernetes.client
+from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
 import mlrun.common.schemas.function
 import mlrun.errors
+import mlrun.k8s_utils
 import mlrun.runtimes.pod
 from mlrun.config import config
 from ...execution import MLClientCtx
 from ...model import RunObject
-from ...platforms.iguazio import mount_v3io, mount_v3iod
 from ...utils import update_in, verify_field_regex
 from ..kubejob import KubejobRuntime
 from ..pod import KubeResourceSpec
@@ -451,7 +452,7 @@ class Spark3JobSpec(KubeResourceSpec):
 class Spark3Runtime(KubejobRuntime):
     group = "sparkoperator.k8s.io"
     version = "v1beta2"
-    apiVersion = group + "/" + version
+    apiVersion = group + "/" + version  # noqa: N815
     kind = "spark"
     plural = "sparkapplications"
@@ -505,13 +506,11 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        # TODO add affinity support
-        # https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go#L491
-        if affinity:
-            raise NotImplementedError(
-                "Setting affinity is not supported for spark runtime"
-            )
-        super().with_node_selection(node_name, node_selector, affinity, tolerations)
+        mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
+        self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
+        self.with_executor_node_selection(
+            node_name, node_selector, affinity, tolerations
+        )
     def with_driver_node_selection(
         self,
@@ -537,11 +536,12 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        if affinity:
+        if affinity is not None:
             self.spec.driver_affinity = affinity
-        if node_selector:
+        if node_selector is not None:
+            mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
             self.spec.driver_node_selector = node_selector
-        if tolerations:
+        if tolerations is not None:
             self.spec.driver_tolerations = tolerations
     def with_executor_node_selection(
@@ -568,11 +568,12 @@ class Spark3Runtime(KubejobRuntime):
             raise NotImplementedError(
                 "Setting node name is not supported for spark runtime"
             )
-        if affinity:
+        if affinity is not None:
             self.spec.executor_affinity = affinity
-        if node_selector:
+        if node_selector is not None:
+            mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
             self.spec.executor_node_selector = node_selector
-        if tolerations:
+        if tolerations is not None:
             self.spec.executor_tolerations = tolerations
     def with_preemption_mode(
@@ -811,9 +812,7 @@ class Spark3Runtime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls, with_gpu=False):
-        from mlrun.run import new_function
-        sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
+        sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
         sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
         # setting required resources

mlrun/runtimes/utils.py CHANGED Viewed

@@ -20,17 +20,17 @@ from io import StringIO
 from sys import stderr
 import pandas as pd
-from kubernetes import client
 import mlrun
 import mlrun.common.constants
+import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas
 import mlrun.utils.regex
 from mlrun.artifacts import TableArtifact
+from mlrun.common.runtimes.constants import RunLabels
 from mlrun.config import config
 from mlrun.errors import err_to_str
 from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
-from mlrun.runtimes.constants import RunLabels
 from mlrun.runtimes.generators import selector
 from mlrun.utils import get_in, helpers, logger, verify_field_regex
@@ -39,9 +39,6 @@ class RunError(Exception):
     pass
-mlrun_key = "mlrun/"
 class _ContextStore:
     def __init__(self):
         self._context = None
@@ -280,43 +277,6 @@ def get_item_name(item, attr="name"):
         return getattr(item, attr, None)
-def apply_kfp(modify, cop, runtime):
-    modify(cop)
-    # Have to do it here to avoid circular dependencies
-    from .pod import AutoMountType
-    if AutoMountType.is_auto_modifier(modify):
-        runtime.spec.disable_auto_mount = True
-    api = client.ApiClient()
-    for k, v in cop.pod_labels.items():
-        runtime.metadata.labels[k] = v
-    for k, v in cop.pod_annotations.items():
-        runtime.metadata.annotations[k] = v
-    if cop.container.env:
-        env_names = [
-            e.name if hasattr(e, "name") else e["name"] for e in runtime.spec.env
-        ]
-        for e in api.sanitize_for_serialization(cop.container.env):
-            name = e["name"]
-            if name in env_names:
-                runtime.spec.env[env_names.index(name)] = e
-            else:
-                runtime.spec.env.append(e)
-                env_names.append(name)
-        cop.container.env.clear()
-    if cop.volumes and cop.container.volume_mounts:
-        vols = api.sanitize_for_serialization(cop.volumes)
-        mounts = api.sanitize_for_serialization(cop.container.volume_mounts)
-        runtime.spec.update_vols_and_mounts(vols, mounts)
-        cop.volumes.clear()
-        cop.container.volume_mounts.clear()
-    return runtime
 def verify_limits(
     resources_field_name,
     mem=None,
@@ -410,41 +370,13 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
 def get_func_selector(project, name=None, tag=None):
-    s = [f"{mlrun_key}project={project}"]
+    s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
     if name:
-        s.append(f"{mlrun_key}function={name}")
-        s.append(f"{mlrun_key}tag={tag or 'latest'}")
+        s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
+        s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
     return s
-class k8s_resource:
-    kind = ""
-    per_run = False
-    per_function = False
-    k8client = None
-    def deploy_function(self, function):
-        pass
-    def release_function(self, function):
-        pass
-    def submit_run(self, function, runobj):
-        pass
-    def get_object(self, name, namespace=None):
-        return None
-    def get_status(self, name, namespace=None):
-        return None
-    def del_object(self, name, namespace=None):
-        pass
-    def get_pods(self, name, namespace=None, master=False):
-        return {}
 def enrich_function_from_dict(function, function_dict):
     override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
     for attribute in [
@@ -504,6 +436,7 @@ def enrich_run_labels(
 ):
     labels_enrichment = {
         RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
+        # TODO: remove this in 1.9.0
         RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
     }
     labels_to_enrich = labels_to_enrich or RunLabels.all()
@@ -512,3 +445,37 @@ def enrich_run_labels(
         if label.value not in labels and enrichment:
             labels[label.value] = enrichment
     return labels
+def resolve_node_selectors(
+    project_node_selector: dict, instance_node_selector: dict
+) -> dict:
+    config_node_selector = mlrun.mlconf.get_default_function_node_selector()
+    if project_node_selector or config_node_selector:
+        mlrun.utils.logger.debug(
+            "Enriching node selector from project and mlrun config",
+            project_node_selector=project_node_selector,
+            config_node_selector=config_node_selector,
+        )
+        return mlrun.utils.helpers.merge_dicts_with_precedence(
+            config_node_selector,
+            project_node_selector,
+            instance_node_selector,
+        )
+    return instance_node_selector
+def enrich_gateway_timeout_annotations(annotations: dict, gateway_timeout: int):
+    """
+    Set gateway proxy connect/read/send timeout annotations
+    :param annotations:     The annotations to enrich
+    :param gateway_timeout: The timeout to set
+    """
+    if not gateway_timeout:
+        return
+    gateway_timeout_str = str(gateway_timeout)
+    annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
+        gateway_timeout_str
+    )
+    annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = gateway_timeout_str
+    annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = gateway_timeout_str

mlrun/secrets.py CHANGED Viewed

@@ -163,15 +163,19 @@ def get_secret_or_env(
     Example::
-        secrets = { "KEY1": "VALUE1" }
+        secrets = {"KEY1": "VALUE1"}
         secret = get_secret_or_env("KEY1", secret_provider=secrets)
         # Using a function to retrieve a secret
         def my_secret_provider(key):
             # some internal logic to retrieve secret
             return value
-        secret = get_secret_or_env("KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS")
+        secret = get_secret_or_env(
+            "KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS"
+        )
     :param key: Secret key to look for
     :param secret_provider: Dictionary, callable or `SecretsStore` to extract the secret value from. If using a

mlrun/serving/__init__.py CHANGED Viewed

@@ -22,10 +22,17 @@ __all__ = [
     "RouterStep",
     "QueueStep",
     "ErrorStep",
+    "MonitoringApplicationStep",
 ]
 from .routers import ModelRouter, VotingEnsemble  # noqa
 from .server import GraphContext, GraphServer, create_graph_server  # noqa
-from .states import ErrorStep, QueueStep, RouterStep, TaskStep  # noqa
+from .states import (
+    ErrorStep,
+    QueueStep,
+    RouterStep,
+    TaskStep,
+    MonitoringApplicationStep,
+)  # noqa
 from .v1_serving import MLModelServer, new_v1_model_server  # noqa
 from .v2_serving import V2ModelServer  # noqa

mlrun/serving/remote.py CHANGED Viewed

@@ -172,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
         if not self._session:
             self._session = mlrun.utils.HTTPSessionWithRetry(
                 self.retries,
-                self.backoff_factor
-                or mlrun.config.config.http_retry_defaults.backoff_factor,
+                self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
                 retry_on_exception=False,
                 retry_on_status=self.retries > 0,
                 retry_on_post=True,
@@ -185,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
             resp = self._session.request(
                 method,
                 url,
-                verify=mlrun.config.config.httpdb.http.verify,
+                verify=mlrun.mlconf.httpdb.http.verify,
                 headers=headers,
                 data=body,
                 timeout=self.timeout,

mlrun/serving/routers.py CHANGED Viewed

@@ -28,10 +28,10 @@ import numpy as np
 import mlrun
 import mlrun.common.model_monitoring
 import mlrun.common.schemas.model_monitoring
+from mlrun.errors import err_to_str
 from mlrun.utils import logger, now_date
 from ..common.helpers import parse_versioned_object_uri
-from ..config import config
 from .server import GraphServer
 from .utils import RouterToDict, _extract_input_data, _update_result_body
 from .v2_serving import _ModelLogPusher
@@ -271,7 +271,9 @@ class ParallelRun(BaseModelRouter):
             fn = mlrun.new_function("parallel", kind="serving")
             graph = fn.set_topology(
                 "router",
-                mlrun.serving.routers.ParallelRun(extend_event=True, executor_type=executor),
+                mlrun.serving.routers.ParallelRun(
+                    extend_event=True, executor_type=executor
+                ),
             )
             graph.add_route("child1", class_name="Cls1")
             graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
@@ -489,6 +491,7 @@ class VotingEnsemble(ParallelRun):
         executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
         format_response_with_col_name_flag: bool = False,
         prediction_col_name: str = "prediction",
+        shard_by_endpoint: typing.Optional[bool] = None,
         **kwargs,
     ):
         """Voting Ensemble
@@ -578,6 +581,8 @@ class VotingEnsemble(ParallelRun):
                               `{id: <id>, model_name: <name>, outputs: {..., prediction: [<predictions>], ...}}`
                               the prediction_col_name should be `prediction`.
                               by default, `prediction`
+        :param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
+                                  monitoring stream. Defaults to True.
         :param kwargs:        extra arguments
         """
         super().__init__(
@@ -604,6 +609,7 @@ class VotingEnsemble(ParallelRun):
         self.prediction_col_name = prediction_col_name or "prediction"
         self.format_response_with_col_name_flag = format_response_with_col_name_flag
         self.model_endpoint_uid = None
+        self.shard_by_endpoint = shard_by_endpoint
     def post_init(self, mode="sync"):
         server = getattr(self.context, "_server", None) or getattr(
@@ -613,7 +619,7 @@ class VotingEnsemble(ParallelRun):
             logger.warn("GraphServer not initialized for VotingEnsemble instance")
             return
-        if not self.context.is_mock or self.context.server.track_models:
+        if not self.context.is_mock or self.context.monitoring_mock:
             self.model_endpoint_uid = _init_endpoint_record(server, self)
         self._update_weights(self.weights)
@@ -905,7 +911,12 @@ class VotingEnsemble(ParallelRun):
         if self._model_logger and self.log_router:
             if "id" not in request:
                 request["id"] = response.body["id"]
-            self._model_logger.push(start, request, response.body)
+            partition_key = (
+                self.model_endpoint_uid if self.shard_by_endpoint is not False else None
+            )
+            self._model_logger.push(
+                start, request, response.body, partition_key=partition_key
+            )
         event.body = _update_result_body(
             self._result_path, original_body, response.body if response else None
         )
@@ -1013,7 +1024,7 @@ def _init_endpoint_record(
             graph_server.function_uri
         )
     except Exception as e:
-        logger.error("Failed to parse function URI", exc=e)
+        logger.error("Failed to parse function URI", exc=err_to_str(e))
         return None
     # Generating version model value based on the model name and model version
@@ -1027,74 +1038,88 @@ def _init_endpoint_record(
         function_uri=graph_server.function_uri, versioned_model=versioned_model_name
     ).uid
-    # If model endpoint object was found in DB, skip the creation process.
     try:
-        mlrun.get_run_db().get_model_endpoint(project=project, endpoint_id=endpoint_uid)
+        model_ep = mlrun.get_run_db().get_model_endpoint(
+            project=project, endpoint_id=endpoint_uid
+        )
     except mlrun.errors.MLRunNotFoundError:
+        model_ep = None
+    except mlrun.errors.MLRunBadRequestError as err:
+        logger.debug(
+            f"Cant reach to model endpoints store, due to  : {err}",
+        )
+        return
+    if voting_ensemble.context.server.track_models and not model_ep:
         logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
+        # Get the children model endpoints ids
+        children_uids = []
+        for _, c in voting_ensemble.routes.items():
+            if hasattr(c, "endpoint_uid"):
+                children_uids.append(c.endpoint_uid)
+        model_endpoint = mlrun.common.schemas.ModelEndpoint(
+            metadata=mlrun.common.schemas.ModelEndpointMetadata(
+                project=project, uid=endpoint_uid
+            ),
+            spec=mlrun.common.schemas.ModelEndpointSpec(
+                function_uri=graph_server.function_uri,
+                model=versioned_model_name,
+                model_class=voting_ensemble.__class__.__name__,
+                stream_path=voting_ensemble.context.stream.stream_uri,
+                active=True,
+                monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
+            ),
+            status=mlrun.common.schemas.ModelEndpointStatus(
+                children=list(voting_ensemble.routes.keys()),
+                endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
+                children_uids=children_uids,
+            ),
+        )
-        try:
-            # Get the children model endpoints ids
-            children_uids = []
-            for _, c in voting_ensemble.routes.items():
-                if hasattr(c, "endpoint_uid"):
-                    children_uids.append(c.endpoint_uid)
-            model_endpoint = mlrun.common.schemas.ModelEndpoint(
-                metadata=mlrun.common.schemas.ModelEndpointMetadata(
-                    project=project, uid=endpoint_uid
-                ),
-                spec=mlrun.common.schemas.ModelEndpointSpec(
-                    function_uri=graph_server.function_uri,
-                    model=versioned_model_name,
-                    model_class=voting_ensemble.__class__.__name__,
-                    stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
-                        project=project, kind="stream"
-                    ),
-                    active=True,
-                    monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
-                    if voting_ensemble.context.server.track_models
-                    else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
-                ),
-                status=mlrun.common.schemas.ModelEndpointStatus(
-                    children=list(voting_ensemble.routes.keys()),
-                    endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
-                    children_uids=children_uids,
-                ),
-            )
+        db = mlrun.get_run_db()
-            db = mlrun.get_run_db()
+        db.create_model_endpoint(
+            project=project,
+            endpoint_id=model_endpoint.metadata.uid,
+            model_endpoint=model_endpoint.dict(),
+        )
+        # Update model endpoint children type
+        for model_endpoint in children_uids:
+            current_endpoint = db.get_model_endpoint(
+                project=project, endpoint_id=model_endpoint
+            )
+            current_endpoint.status.endpoint_type = (
+                mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
+            )
             db.create_model_endpoint(
                 project=project,
-                endpoint_id=model_endpoint.metadata.uid,
-                model_endpoint=model_endpoint.dict(),
+                endpoint_id=model_endpoint,
+                model_endpoint=current_endpoint,
             )
-            # Update model endpoint children type
-            for model_endpoint in children_uids:
-                current_endpoint = db.get_model_endpoint(
-                    project=project, endpoint_id=model_endpoint
-                )
-                current_endpoint.status.endpoint_type = (
-                    mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
-                )
-                db.create_model_endpoint(
-                    project=project,
-                    endpoint_id=model_endpoint,
-                    model_endpoint=current_endpoint,
-                )
-        except Exception as exc:
-            logger.warning(
-                "Failed creating model endpoint record",
-                exc=exc,
-                traceback=traceback.format_exc(),
-            )
-    except Exception as e:
-        logger.error("Failed to retrieve model endpoint object", exc=e)
+    elif (
+        model_ep
+        and (
+            model_ep.spec.monitoring_mode
+            == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
+        )
+        != voting_ensemble.context.server.track_models
+    ):
+        monitoring_mode = (
+            mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
+            if voting_ensemble.context.server.track_models
+            else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
+        )
+        db = mlrun.get_run_db()
+        db.patch_model_endpoint(
+            project=project,
+            endpoint_id=endpoint_uid,
+            attributes={"monitoring_mode": monitoring_mode},
+        )
+        logger.debug(
+            f"Updating model endpoint monitoring_mode to {monitoring_mode}",
+            endpoint_id=endpoint_uid,
+        )
     return endpoint_uid

mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc4py3-none-any.whl → 1.7.2py3-none-any.whl