mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +25 -111
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +38 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +41 -47
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +68 -0
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +25 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +8 -2
- mlrun/common/schemas/client_spec.py +2 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +19 -3
- mlrun/common/schemas/model_monitoring/constants.py +96 -26
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +22 -21
- mlrun/common/types.py +7 -1
- mlrun/config.py +87 -19
- mlrun/data_types/data_types.py +4 -0
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +69 -30
- mlrun/datastore/datastore.py +10 -2
- mlrun/datastore/datastore_profile.py +90 -6
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +172 -44
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +285 -41
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +149 -14
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +608 -178
- mlrun/db/nopdb.py +191 -7
- mlrun/errors.py +11 -0
- mlrun/execution.py +37 -20
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +21 -52
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +9 -3
- mlrun/launcher/remote.py +9 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +58 -19
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +127 -301
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +30 -36
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +100 -7
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +93 -228
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +30 -16
- mlrun/projects/pipelines.py +92 -99
- mlrun/projects/project.py +757 -268
- mlrun/render.py +15 -14
- mlrun/run.py +160 -162
- mlrun/runtimes/__init__.py +55 -3
- mlrun/runtimes/base.py +33 -19
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +98 -58
- mlrun/runtimes/nuclio/serving.py +36 -42
- mlrun/runtimes/pod.py +196 -45
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +7 -4
- mlrun/serving/server.py +7 -8
- mlrun/serving/states.py +73 -43
- mlrun/serving/v2_serving.py +8 -7
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +141 -75
- mlrun/utils/http.py +1 -1
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/runtimes/pod.py
CHANGED
|
@@ -15,12 +15,14 @@ import copy
|
|
|
15
15
|
import inspect
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
|
+
import time
|
|
18
19
|
import typing
|
|
19
20
|
from enum import Enum
|
|
20
21
|
|
|
21
22
|
import dotenv
|
|
22
|
-
import kfp.dsl
|
|
23
23
|
import kubernetes.client as k8s_client
|
|
24
|
+
import mlrun_pipelines.mounts
|
|
25
|
+
from mlrun_pipelines.mixins import KfpAdapterMixin
|
|
24
26
|
|
|
25
27
|
import mlrun.errors
|
|
26
28
|
import mlrun.utils.regex
|
|
@@ -40,7 +42,6 @@ from ..k8s_utils import (
|
|
|
40
42
|
from ..utils import logger, update_in
|
|
41
43
|
from .base import BaseRuntime, FunctionSpec, spec_fields
|
|
42
44
|
from .utils import (
|
|
43
|
-
apply_kfp,
|
|
44
45
|
get_gpu_from_resource_requirement,
|
|
45
46
|
get_item_name,
|
|
46
47
|
set_named_item,
|
|
@@ -934,12 +935,12 @@ class AutoMountType(str, Enum):
|
|
|
934
935
|
@classmethod
|
|
935
936
|
def all_mount_modifiers(cls):
|
|
936
937
|
return [
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
938
|
+
mlrun_pipelines.mounts.v3io_cred.__name__,
|
|
939
|
+
mlrun_pipelines.mounts.mount_v3io.__name__,
|
|
940
|
+
mlrun_pipelines.mounts.mount_pvc.__name__,
|
|
941
|
+
mlrun_pipelines.mounts.auto_mount.__name__,
|
|
942
|
+
mlrun_pipelines.mounts.mount_s3.__name__,
|
|
943
|
+
mlrun_pipelines.mounts.set_env_variables.__name__,
|
|
943
944
|
]
|
|
944
945
|
|
|
945
946
|
@classmethod
|
|
@@ -956,27 +957,27 @@ class AutoMountType(str, Enum):
|
|
|
956
957
|
def _get_auto_modifier():
|
|
957
958
|
# If we're running on Iguazio - use v3io_cred
|
|
958
959
|
if mlconf.igz_version != "":
|
|
959
|
-
return
|
|
960
|
+
return mlrun_pipelines.mounts.v3io_cred
|
|
960
961
|
# Else, either pvc mount if it's configured or do nothing otherwise
|
|
961
962
|
pvc_configured = (
|
|
962
963
|
"MLRUN_PVC_MOUNT" in os.environ
|
|
963
964
|
or "pvc_name" in mlconf.get_storage_auto_mount_params()
|
|
964
965
|
)
|
|
965
|
-
return
|
|
966
|
+
return mlrun_pipelines.mounts.mount_pvc if pvc_configured else None
|
|
966
967
|
|
|
967
968
|
def get_modifier(self):
|
|
968
969
|
return {
|
|
969
970
|
AutoMountType.none: None,
|
|
970
|
-
AutoMountType.v3io_credentials:
|
|
971
|
-
AutoMountType.v3io_fuse:
|
|
972
|
-
AutoMountType.pvc:
|
|
971
|
+
AutoMountType.v3io_credentials: mlrun_pipelines.mounts.v3io_cred,
|
|
972
|
+
AutoMountType.v3io_fuse: mlrun_pipelines.mounts.mount_v3io,
|
|
973
|
+
AutoMountType.pvc: mlrun_pipelines.mounts.mount_pvc,
|
|
973
974
|
AutoMountType.auto: self._get_auto_modifier(),
|
|
974
|
-
AutoMountType.s3:
|
|
975
|
-
AutoMountType.env:
|
|
975
|
+
AutoMountType.s3: mlrun_pipelines.mounts.mount_s3,
|
|
976
|
+
AutoMountType.env: mlrun_pipelines.mounts.set_env_variables,
|
|
976
977
|
}[self]
|
|
977
978
|
|
|
978
979
|
|
|
979
|
-
class KubeResource(BaseRuntime):
|
|
980
|
+
class KubeResource(BaseRuntime, KfpAdapterMixin):
|
|
980
981
|
"""
|
|
981
982
|
A parent class for runtimes that generate k8s resources when executing.
|
|
982
983
|
"""
|
|
@@ -985,7 +986,7 @@ class KubeResource(BaseRuntime):
|
|
|
985
986
|
_is_nested = True
|
|
986
987
|
|
|
987
988
|
def __init__(self, spec=None, metadata=None):
|
|
988
|
-
super().__init__(metadata, spec)
|
|
989
|
+
super().__init__(metadata=metadata, spec=spec)
|
|
989
990
|
self.verbose = False
|
|
990
991
|
|
|
991
992
|
@property
|
|
@@ -996,26 +997,6 @@ class KubeResource(BaseRuntime):
|
|
|
996
997
|
def spec(self, spec):
|
|
997
998
|
self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
|
|
998
999
|
|
|
999
|
-
def apply(self, modify):
|
|
1000
|
-
"""
|
|
1001
|
-
Apply a modifier to the runtime which is used to change the runtimes k8s object's spec.
|
|
1002
|
-
Modifiers can be either KFP modifiers or MLRun modifiers (which are compatible with KFP). All modifiers accept
|
|
1003
|
-
a `kfp.dsl.ContainerOp` object, apply some changes on its spec and return it so modifiers can be chained
|
|
1004
|
-
one after the other.
|
|
1005
|
-
|
|
1006
|
-
:param modify: a modifier runnable object
|
|
1007
|
-
:return: the runtime (self) after the modifications
|
|
1008
|
-
"""
|
|
1009
|
-
|
|
1010
|
-
# Kubeflow pipeline have a hook to add the component to the DAG on ContainerOp init
|
|
1011
|
-
# we remove the hook to suppress kubeflow op registration and return it after the apply()
|
|
1012
|
-
old_op_handler = kfp.dsl._container_op._register_op_handler
|
|
1013
|
-
kfp.dsl._container_op._register_op_handler = lambda x: self.metadata.name
|
|
1014
|
-
cop = kfp.dsl.ContainerOp("name", "image")
|
|
1015
|
-
kfp.dsl._container_op._register_op_handler = old_op_handler
|
|
1016
|
-
|
|
1017
|
-
return apply_kfp(modify, cop, self)
|
|
1018
|
-
|
|
1019
1000
|
def set_env_from_secret(self, name, secret=None, secret_key=None):
|
|
1020
1001
|
"""set pod environment var from secret"""
|
|
1021
1002
|
secret_key = secret_key or name
|
|
@@ -1057,14 +1038,40 @@ class KubeResource(BaseRuntime):
|
|
|
1057
1038
|
return True
|
|
1058
1039
|
return False
|
|
1059
1040
|
|
|
1041
|
+
def enrich_runtime_spec(
|
|
1042
|
+
self,
|
|
1043
|
+
project_node_selector: dict[str, str],
|
|
1044
|
+
):
|
|
1045
|
+
"""
|
|
1046
|
+
Enriches the runtime spec with the project-level node selector.
|
|
1047
|
+
|
|
1048
|
+
This method merges the project-level node selector with the existing function node_selector.
|
|
1049
|
+
The merge logic used here combines the two dictionaries, giving precedence to
|
|
1050
|
+
the keys in the runtime node_selector. If there are conflicting keys between the
|
|
1051
|
+
two dictionaries, the values from self.spec.node_selector will overwrite the
|
|
1052
|
+
values from project_node_selector.
|
|
1053
|
+
|
|
1054
|
+
Example:
|
|
1055
|
+
Suppose self.spec.node_selector = {"type": "gpu", "zone": "us-east-1"}
|
|
1056
|
+
and project_node_selector = {"type": "cpu", "environment": "production"}.
|
|
1057
|
+
After the merge, the resulting node_selector will be:
|
|
1058
|
+
{"type": "gpu", "zone": "us-east-1", "environment": "production"}
|
|
1059
|
+
|
|
1060
|
+
Note:
|
|
1061
|
+
- The merge uses the ** operator, also known as the "unpacking" operator in Python,
|
|
1062
|
+
combining key-value pairs from each dictionary. Later dictionaries take precedence
|
|
1063
|
+
when there are conflicting keys.
|
|
1064
|
+
"""
|
|
1065
|
+
self.spec.node_selector = {**project_node_selector, **self.spec.node_selector}
|
|
1066
|
+
|
|
1060
1067
|
def _set_env(self, name, value=None, value_from=None):
|
|
1061
1068
|
new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1069
|
+
|
|
1070
|
+
# ensure we don't have duplicate env vars with the same name
|
|
1071
|
+
for env_index, value_item in enumerate(self.spec.env):
|
|
1072
|
+
if get_item_name(value_item) == name:
|
|
1073
|
+
self.spec.env[env_index] = new_var
|
|
1066
1074
|
return self
|
|
1067
|
-
i += 1
|
|
1068
1075
|
self.spec.env.append(new_var)
|
|
1069
1076
|
return self
|
|
1070
1077
|
|
|
@@ -1251,9 +1258,9 @@ class KubeResource(BaseRuntime):
|
|
|
1251
1258
|
from kubernetes import client as k8s_client
|
|
1252
1259
|
|
|
1253
1260
|
security_context = k8s_client.V1SecurityContext(
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1261
|
+
run_as_user=1000,
|
|
1262
|
+
run_as_group=3000,
|
|
1263
|
+
)
|
|
1257
1264
|
function.with_security_context(security_context)
|
|
1258
1265
|
|
|
1259
1266
|
More info:
|
|
@@ -1312,6 +1319,150 @@ class KubeResource(BaseRuntime):
|
|
|
1312
1319
|
|
|
1313
1320
|
self.spec.validate_service_account(allowed_service_accounts)
|
|
1314
1321
|
|
|
1322
|
+
def _configure_mlrun_build_with_source(
|
|
1323
|
+
self, source, workdir=None, handler=None, pull_at_runtime=True, target_dir=None
|
|
1324
|
+
):
|
|
1325
|
+
mlrun.utils.helpers.validate_builder_source(source, pull_at_runtime, workdir)
|
|
1326
|
+
|
|
1327
|
+
self.spec.build.source = source
|
|
1328
|
+
if handler:
|
|
1329
|
+
self.spec.default_handler = handler
|
|
1330
|
+
if workdir:
|
|
1331
|
+
self.spec.workdir = workdir
|
|
1332
|
+
if target_dir:
|
|
1333
|
+
self.spec.build.source_code_target_dir = target_dir
|
|
1334
|
+
|
|
1335
|
+
self.spec.build.load_source_on_run = pull_at_runtime
|
|
1336
|
+
if (
|
|
1337
|
+
self.spec.build.base_image
|
|
1338
|
+
and not self.spec.build.commands
|
|
1339
|
+
and pull_at_runtime
|
|
1340
|
+
and not self.spec.image
|
|
1341
|
+
):
|
|
1342
|
+
# if we load source from repo and don't need a full build use the base_image as the image
|
|
1343
|
+
self.spec.image = self.spec.build.base_image
|
|
1344
|
+
elif not pull_at_runtime:
|
|
1345
|
+
# clear the image so build will not be skipped
|
|
1346
|
+
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
1347
|
+
self.spec.image = ""
|
|
1348
|
+
|
|
1349
|
+
def _resolve_build_with_mlrun(self, with_mlrun: typing.Optional[bool] = None):
|
|
1350
|
+
build = self.spec.build
|
|
1351
|
+
if with_mlrun is None:
|
|
1352
|
+
if build.with_mlrun is not None:
|
|
1353
|
+
with_mlrun = build.with_mlrun
|
|
1354
|
+
else:
|
|
1355
|
+
with_mlrun = build.base_image and not (
|
|
1356
|
+
build.base_image.startswith("mlrun/")
|
|
1357
|
+
or "/mlrun/" in build.base_image
|
|
1358
|
+
)
|
|
1359
|
+
if (
|
|
1360
|
+
not build.source
|
|
1361
|
+
and not build.commands
|
|
1362
|
+
and not build.requirements
|
|
1363
|
+
and not build.extra
|
|
1364
|
+
and with_mlrun
|
|
1365
|
+
):
|
|
1366
|
+
logger.info(
|
|
1367
|
+
"Running build to add mlrun package, set "
|
|
1368
|
+
"with_mlrun=False to skip if its already in the image"
|
|
1369
|
+
)
|
|
1370
|
+
return with_mlrun
|
|
1371
|
+
|
|
1372
|
+
def _build_image(
|
|
1373
|
+
self,
|
|
1374
|
+
builder_env,
|
|
1375
|
+
force_build,
|
|
1376
|
+
mlrun_version_specifier,
|
|
1377
|
+
show_on_failure,
|
|
1378
|
+
skip_deployed,
|
|
1379
|
+
watch,
|
|
1380
|
+
is_kfp,
|
|
1381
|
+
with_mlrun,
|
|
1382
|
+
):
|
|
1383
|
+
# When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
|
|
1384
|
+
# is actually done. (when a pipelines pod exits, the pipeline step marked as done)
|
|
1385
|
+
if is_kfp:
|
|
1386
|
+
watch = True
|
|
1387
|
+
|
|
1388
|
+
db = self._get_db()
|
|
1389
|
+
data = db.remote_builder(
|
|
1390
|
+
self,
|
|
1391
|
+
with_mlrun,
|
|
1392
|
+
mlrun_version_specifier,
|
|
1393
|
+
skip_deployed,
|
|
1394
|
+
builder_env=builder_env,
|
|
1395
|
+
force_build=force_build,
|
|
1396
|
+
)
|
|
1397
|
+
self.status = data["data"].get("status", None)
|
|
1398
|
+
self.spec.image = mlrun.utils.get_in(
|
|
1399
|
+
data, "data.spec.image"
|
|
1400
|
+
) or mlrun.utils.get_in(data, "data.spec.build.image")
|
|
1401
|
+
self.spec.build.base_image = self.spec.build.base_image or mlrun.utils.get_in(
|
|
1402
|
+
data, "data.spec.build.base_image"
|
|
1403
|
+
)
|
|
1404
|
+
# Get the source target dir in case it was enriched due to loading source
|
|
1405
|
+
self.spec.build.source_code_target_dir = mlrun.utils.get_in(
|
|
1406
|
+
data, "data.spec.build.source_code_target_dir"
|
|
1407
|
+
) or mlrun.utils.get_in(data, "data.spec.clone_target_dir")
|
|
1408
|
+
ready = data.get("ready", False)
|
|
1409
|
+
if not ready:
|
|
1410
|
+
logger.info(
|
|
1411
|
+
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
1412
|
+
)
|
|
1413
|
+
if watch and not ready:
|
|
1414
|
+
state = self._build_watch(
|
|
1415
|
+
watch=watch,
|
|
1416
|
+
show_on_failure=show_on_failure,
|
|
1417
|
+
)
|
|
1418
|
+
ready = state == "ready"
|
|
1419
|
+
self.status.state = state
|
|
1420
|
+
|
|
1421
|
+
if watch and not ready:
|
|
1422
|
+
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
1423
|
+
return ready
|
|
1424
|
+
|
|
1425
|
+
def _build_watch(
|
|
1426
|
+
self,
|
|
1427
|
+
watch: bool = True,
|
|
1428
|
+
logs: bool = True,
|
|
1429
|
+
show_on_failure: bool = False,
|
|
1430
|
+
):
|
|
1431
|
+
db = self._get_db()
|
|
1432
|
+
offset = 0
|
|
1433
|
+
try:
|
|
1434
|
+
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
1435
|
+
except mlrun.db.RunDBError:
|
|
1436
|
+
raise ValueError("function or build process not found")
|
|
1437
|
+
|
|
1438
|
+
def print_log(text):
|
|
1439
|
+
if text and (
|
|
1440
|
+
not show_on_failure
|
|
1441
|
+
or self.status.state == mlrun.common.schemas.FunctionState.error
|
|
1442
|
+
):
|
|
1443
|
+
print(text, end="")
|
|
1444
|
+
|
|
1445
|
+
print_log(text)
|
|
1446
|
+
offset += len(text)
|
|
1447
|
+
if watch:
|
|
1448
|
+
while self.status.state in [
|
|
1449
|
+
mlrun.common.schemas.FunctionState.pending,
|
|
1450
|
+
mlrun.common.schemas.FunctionState.running,
|
|
1451
|
+
]:
|
|
1452
|
+
time.sleep(2)
|
|
1453
|
+
if show_on_failure:
|
|
1454
|
+
text = ""
|
|
1455
|
+
db.get_builder_status(self, 0, logs=False)
|
|
1456
|
+
if self.status.state == mlrun.common.schemas.FunctionState.error:
|
|
1457
|
+
# re-read the full log on failure
|
|
1458
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1459
|
+
else:
|
|
1460
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1461
|
+
print_log(text)
|
|
1462
|
+
offset += len(text)
|
|
1463
|
+
|
|
1464
|
+
return self.status.state
|
|
1465
|
+
|
|
1315
1466
|
|
|
1316
1467
|
def _resolve_if_type_sanitized(attribute_name, attribute):
|
|
1317
1468
|
attribute_config = sanitized_attributes[attribute_name]
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -15,11 +15,11 @@ import re
|
|
|
15
15
|
from subprocess import run
|
|
16
16
|
|
|
17
17
|
import kubernetes.client
|
|
18
|
+
from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
|
|
18
19
|
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
from mlrun.config import config
|
|
21
22
|
|
|
22
|
-
from ..platforms.iguazio import mount_v3io, mount_v3iod
|
|
23
23
|
from .kubejob import KubejobRuntime
|
|
24
24
|
from .pod import KubeResourceSpec
|
|
25
25
|
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
import typing
|
|
15
15
|
|
|
16
16
|
import kubernetes.client
|
|
17
|
+
from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
|
|
17
18
|
|
|
18
19
|
import mlrun.common.schemas.function
|
|
19
20
|
import mlrun.errors
|
|
@@ -22,7 +23,6 @@ from mlrun.config import config
|
|
|
22
23
|
|
|
23
24
|
from ...execution import MLClientCtx
|
|
24
25
|
from ...model import RunObject
|
|
25
|
-
from ...platforms.iguazio import mount_v3io, mount_v3iod
|
|
26
26
|
from ...utils import update_in, verify_field_regex
|
|
27
27
|
from ..kubejob import KubejobRuntime
|
|
28
28
|
from ..pod import KubeResourceSpec
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -20,17 +20,17 @@ from io import StringIO
|
|
|
20
20
|
from sys import stderr
|
|
21
21
|
|
|
22
22
|
import pandas as pd
|
|
23
|
-
from kubernetes import client
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
import mlrun.common.constants
|
|
26
|
+
import mlrun.common.constants as mlrun_constants
|
|
27
27
|
import mlrun.common.schemas
|
|
28
28
|
import mlrun.utils.regex
|
|
29
29
|
from mlrun.artifacts import TableArtifact
|
|
30
|
+
from mlrun.common.runtimes.constants import RunLabels
|
|
30
31
|
from mlrun.config import config
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
|
|
33
|
-
from mlrun.runtimes.constants import RunLabels
|
|
34
34
|
from mlrun.runtimes.generators import selector
|
|
35
35
|
from mlrun.utils import get_in, helpers, logger, verify_field_regex
|
|
36
36
|
|
|
@@ -39,9 +39,6 @@ class RunError(Exception):
|
|
|
39
39
|
pass
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
mlrun_key = "mlrun/"
|
|
43
|
-
|
|
44
|
-
|
|
45
42
|
class _ContextStore:
|
|
46
43
|
def __init__(self):
|
|
47
44
|
self._context = None
|
|
@@ -280,43 +277,6 @@ def get_item_name(item, attr="name"):
|
|
|
280
277
|
return getattr(item, attr, None)
|
|
281
278
|
|
|
282
279
|
|
|
283
|
-
def apply_kfp(modify, cop, runtime):
|
|
284
|
-
modify(cop)
|
|
285
|
-
|
|
286
|
-
# Have to do it here to avoid circular dependencies
|
|
287
|
-
from .pod import AutoMountType
|
|
288
|
-
|
|
289
|
-
if AutoMountType.is_auto_modifier(modify):
|
|
290
|
-
runtime.spec.disable_auto_mount = True
|
|
291
|
-
|
|
292
|
-
api = client.ApiClient()
|
|
293
|
-
for k, v in cop.pod_labels.items():
|
|
294
|
-
runtime.metadata.labels[k] = v
|
|
295
|
-
for k, v in cop.pod_annotations.items():
|
|
296
|
-
runtime.metadata.annotations[k] = v
|
|
297
|
-
if cop.container.env:
|
|
298
|
-
env_names = [
|
|
299
|
-
e.name if hasattr(e, "name") else e["name"] for e in runtime.spec.env
|
|
300
|
-
]
|
|
301
|
-
for e in api.sanitize_for_serialization(cop.container.env):
|
|
302
|
-
name = e["name"]
|
|
303
|
-
if name in env_names:
|
|
304
|
-
runtime.spec.env[env_names.index(name)] = e
|
|
305
|
-
else:
|
|
306
|
-
runtime.spec.env.append(e)
|
|
307
|
-
env_names.append(name)
|
|
308
|
-
cop.container.env.clear()
|
|
309
|
-
|
|
310
|
-
if cop.volumes and cop.container.volume_mounts:
|
|
311
|
-
vols = api.sanitize_for_serialization(cop.volumes)
|
|
312
|
-
mounts = api.sanitize_for_serialization(cop.container.volume_mounts)
|
|
313
|
-
runtime.spec.update_vols_and_mounts(vols, mounts)
|
|
314
|
-
cop.volumes.clear()
|
|
315
|
-
cop.container.volume_mounts.clear()
|
|
316
|
-
|
|
317
|
-
return runtime
|
|
318
|
-
|
|
319
|
-
|
|
320
280
|
def verify_limits(
|
|
321
281
|
resources_field_name,
|
|
322
282
|
mem=None,
|
|
@@ -410,41 +370,13 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
|
|
|
410
370
|
|
|
411
371
|
|
|
412
372
|
def get_func_selector(project, name=None, tag=None):
|
|
413
|
-
s = [f"{
|
|
373
|
+
s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
|
|
414
374
|
if name:
|
|
415
|
-
s.append(f"{
|
|
416
|
-
s.append(f"{
|
|
375
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
|
|
376
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
|
|
417
377
|
return s
|
|
418
378
|
|
|
419
379
|
|
|
420
|
-
class k8s_resource:
|
|
421
|
-
kind = ""
|
|
422
|
-
per_run = False
|
|
423
|
-
per_function = False
|
|
424
|
-
k8client = None
|
|
425
|
-
|
|
426
|
-
def deploy_function(self, function):
|
|
427
|
-
pass
|
|
428
|
-
|
|
429
|
-
def release_function(self, function):
|
|
430
|
-
pass
|
|
431
|
-
|
|
432
|
-
def submit_run(self, function, runobj):
|
|
433
|
-
pass
|
|
434
|
-
|
|
435
|
-
def get_object(self, name, namespace=None):
|
|
436
|
-
return None
|
|
437
|
-
|
|
438
|
-
def get_status(self, name, namespace=None):
|
|
439
|
-
return None
|
|
440
|
-
|
|
441
|
-
def del_object(self, name, namespace=None):
|
|
442
|
-
pass
|
|
443
|
-
|
|
444
|
-
def get_pods(self, name, namespace=None, master=False):
|
|
445
|
-
return {}
|
|
446
|
-
|
|
447
|
-
|
|
448
380
|
def enrich_function_from_dict(function, function_dict):
|
|
449
381
|
override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
|
|
450
382
|
for attribute in [
|
|
@@ -504,6 +436,7 @@ def enrich_run_labels(
|
|
|
504
436
|
):
|
|
505
437
|
labels_enrichment = {
|
|
506
438
|
RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
|
|
439
|
+
# TODO: remove this in 1.9.0
|
|
507
440
|
RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
508
441
|
}
|
|
509
442
|
labels_to_enrich = labels_to_enrich or RunLabels.all()
|
mlrun/secrets.py
CHANGED
|
@@ -163,15 +163,19 @@ def get_secret_or_env(
|
|
|
163
163
|
|
|
164
164
|
Example::
|
|
165
165
|
|
|
166
|
-
secrets = {
|
|
166
|
+
secrets = {"KEY1": "VALUE1"}
|
|
167
167
|
secret = get_secret_or_env("KEY1", secret_provider=secrets)
|
|
168
168
|
|
|
169
|
+
|
|
169
170
|
# Using a function to retrieve a secret
|
|
170
171
|
def my_secret_provider(key):
|
|
171
172
|
# some internal logic to retrieve secret
|
|
172
173
|
return value
|
|
173
174
|
|
|
174
|
-
|
|
175
|
+
|
|
176
|
+
secret = get_secret_or_env(
|
|
177
|
+
"KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS"
|
|
178
|
+
)
|
|
175
179
|
|
|
176
180
|
:param key: Secret key to look for
|
|
177
181
|
:param secret_provider: Dictionary, callable or `SecretsStore` to extract the secret value from. If using a
|
mlrun/serving/remote.py
CHANGED
|
@@ -172,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
172
172
|
if not self._session:
|
|
173
173
|
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
174
174
|
self.retries,
|
|
175
|
-
self.backoff_factor
|
|
176
|
-
or mlrun.config.config.http_retry_defaults.backoff_factor,
|
|
175
|
+
self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
|
|
177
176
|
retry_on_exception=False,
|
|
178
177
|
retry_on_status=self.retries > 0,
|
|
179
178
|
retry_on_post=True,
|
|
@@ -185,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
185
184
|
resp = self._session.request(
|
|
186
185
|
method,
|
|
187
186
|
url,
|
|
188
|
-
verify=mlrun.
|
|
187
|
+
verify=mlrun.mlconf.httpdb.http.verify,
|
|
189
188
|
headers=headers,
|
|
190
189
|
data=body,
|
|
191
190
|
timeout=self.timeout,
|
mlrun/serving/routers.py
CHANGED
|
@@ -28,6 +28,7 @@ import numpy as np
|
|
|
28
28
|
import mlrun
|
|
29
29
|
import mlrun.common.model_monitoring
|
|
30
30
|
import mlrun.common.schemas.model_monitoring
|
|
31
|
+
from mlrun.errors import err_to_str
|
|
31
32
|
from mlrun.utils import logger, now_date
|
|
32
33
|
|
|
33
34
|
from ..common.helpers import parse_versioned_object_uri
|
|
@@ -271,7 +272,9 @@ class ParallelRun(BaseModelRouter):
|
|
|
271
272
|
fn = mlrun.new_function("parallel", kind="serving")
|
|
272
273
|
graph = fn.set_topology(
|
|
273
274
|
"router",
|
|
274
|
-
mlrun.serving.routers.ParallelRun(
|
|
275
|
+
mlrun.serving.routers.ParallelRun(
|
|
276
|
+
extend_event=True, executor_type=executor
|
|
277
|
+
),
|
|
275
278
|
)
|
|
276
279
|
graph.add_route("child1", class_name="Cls1")
|
|
277
280
|
graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
|
|
@@ -1013,7 +1016,7 @@ def _init_endpoint_record(
|
|
|
1013
1016
|
graph_server.function_uri
|
|
1014
1017
|
)
|
|
1015
1018
|
except Exception as e:
|
|
1016
|
-
logger.error("Failed to parse function URI", exc=e)
|
|
1019
|
+
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
1017
1020
|
return None
|
|
1018
1021
|
|
|
1019
1022
|
# Generating version model value based on the model name and model version
|
|
@@ -1089,12 +1092,12 @@ def _init_endpoint_record(
|
|
|
1089
1092
|
except Exception as exc:
|
|
1090
1093
|
logger.warning(
|
|
1091
1094
|
"Failed creating model endpoint record",
|
|
1092
|
-
exc=exc,
|
|
1095
|
+
exc=err_to_str(exc),
|
|
1093
1096
|
traceback=traceback.format_exc(),
|
|
1094
1097
|
)
|
|
1095
1098
|
|
|
1096
1099
|
except Exception as e:
|
|
1097
|
-
logger.error("Failed to retrieve model endpoint object", exc=e)
|
|
1100
|
+
logger.error("Failed to retrieve model endpoint object", exc=err_to_str(e))
|
|
1098
1101
|
|
|
1099
1102
|
return endpoint_uid
|
|
1100
1103
|
|
mlrun/serving/server.py
CHANGED
|
@@ -23,6 +23,7 @@ import uuid
|
|
|
23
23
|
from typing import Optional, Union
|
|
24
24
|
|
|
25
25
|
import mlrun
|
|
26
|
+
import mlrun.common.constants
|
|
26
27
|
import mlrun.common.helpers
|
|
27
28
|
import mlrun.model_monitoring
|
|
28
29
|
from mlrun.config import config
|
|
@@ -52,7 +53,7 @@ class _StreamContext:
|
|
|
52
53
|
Initialize _StreamContext object.
|
|
53
54
|
:param enabled: A boolean indication for applying the stream context
|
|
54
55
|
:param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
|
|
55
|
-
parameters might be relevant to the output source such as `
|
|
56
|
+
parameters might be relevant to the output source such as `kafka_brokers` if
|
|
56
57
|
the output source is from type Kafka.
|
|
57
58
|
:param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
|
|
58
59
|
"""
|
|
@@ -311,11 +312,8 @@ class GraphServer(ModelObj):
|
|
|
311
312
|
def v2_serving_init(context, namespace=None):
|
|
312
313
|
"""hook for nuclio init_context()"""
|
|
313
314
|
|
|
314
|
-
data = os.environ.get("SERVING_SPEC_ENV", "")
|
|
315
|
-
if not data:
|
|
316
|
-
raise MLRunInvalidArgumentError("failed to find spec env var")
|
|
317
|
-
spec = json.loads(data)
|
|
318
315
|
context.logger.info("Initializing server from spec")
|
|
316
|
+
spec = mlrun.utils.get_serving_spec()
|
|
319
317
|
server = GraphServer.from_dict(spec)
|
|
320
318
|
if config.log_level.lower() == "debug":
|
|
321
319
|
server.verbose = True
|
|
@@ -355,7 +353,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
355
353
|
|
|
356
354
|
async def termination_callback():
|
|
357
355
|
context.logger.info("Termination callback called")
|
|
358
|
-
|
|
356
|
+
server.wait_for_completion()
|
|
359
357
|
context.logger.info("Termination of async flow is completed")
|
|
360
358
|
|
|
361
359
|
context.platform.set_termination_callback(termination_callback)
|
|
@@ -367,7 +365,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
367
365
|
|
|
368
366
|
async def drain_callback():
|
|
369
367
|
context.logger.info("Drain callback called")
|
|
370
|
-
|
|
368
|
+
server.wait_for_completion()
|
|
371
369
|
context.logger.info(
|
|
372
370
|
"Termination of async flow is completed. Rerunning async flow."
|
|
373
371
|
)
|
|
@@ -389,7 +387,7 @@ def v2_serving_handler(context, event, get_body=False):
|
|
|
389
387
|
|
|
390
388
|
|
|
391
389
|
def create_graph_server(
|
|
392
|
-
parameters=
|
|
390
|
+
parameters=None,
|
|
393
391
|
load_mode=None,
|
|
394
392
|
graph=None,
|
|
395
393
|
verbose=False,
|
|
@@ -405,6 +403,7 @@ def create_graph_server(
|
|
|
405
403
|
server.graph.add_route("my", class_name=MyModelClass, model_path="{path}", z=100)
|
|
406
404
|
print(server.test("/v2/models/my/infer", testdata))
|
|
407
405
|
"""
|
|
406
|
+
parameters = parameters or {}
|
|
408
407
|
server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
|
|
409
408
|
server.set_current_function(
|
|
410
409
|
current_function or os.environ.get("SERVING_CURRENT_FUNCTION", "")
|