mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +31 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +13 -2
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +233 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +387 -119
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +245 -20
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +909 -231
- mlrun/db/nopdb.py +279 -14
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1176 -406
- mlrun/render.py +28 -22
- mlrun/run.py +208 -181
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +54 -24
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/METADATA +0 -269
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
|
@@ -14,15 +14,16 @@
|
|
|
14
14
|
import typing
|
|
15
15
|
|
|
16
16
|
import kubernetes.client
|
|
17
|
+
from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
|
|
17
18
|
|
|
18
19
|
import mlrun.common.schemas.function
|
|
19
20
|
import mlrun.errors
|
|
21
|
+
import mlrun.k8s_utils
|
|
20
22
|
import mlrun.runtimes.pod
|
|
21
23
|
from mlrun.config import config
|
|
22
24
|
|
|
23
25
|
from ...execution import MLClientCtx
|
|
24
26
|
from ...model import RunObject
|
|
25
|
-
from ...platforms.iguazio import mount_v3io, mount_v3iod
|
|
26
27
|
from ...utils import update_in, verify_field_regex
|
|
27
28
|
from ..kubejob import KubejobRuntime
|
|
28
29
|
from ..pod import KubeResourceSpec
|
|
@@ -451,7 +452,7 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
451
452
|
class Spark3Runtime(KubejobRuntime):
|
|
452
453
|
group = "sparkoperator.k8s.io"
|
|
453
454
|
version = "v1beta2"
|
|
454
|
-
apiVersion = group + "/" + version
|
|
455
|
+
apiVersion = group + "/" + version # noqa: N815
|
|
455
456
|
kind = "spark"
|
|
456
457
|
plural = "sparkapplications"
|
|
457
458
|
|
|
@@ -505,13 +506,11 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
505
506
|
raise NotImplementedError(
|
|
506
507
|
"Setting node name is not supported for spark runtime"
|
|
507
508
|
)
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
)
|
|
514
|
-
super().with_node_selection(node_name, node_selector, affinity, tolerations)
|
|
509
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
510
|
+
self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
|
|
511
|
+
self.with_executor_node_selection(
|
|
512
|
+
node_name, node_selector, affinity, tolerations
|
|
513
|
+
)
|
|
515
514
|
|
|
516
515
|
def with_driver_node_selection(
|
|
517
516
|
self,
|
|
@@ -537,11 +536,12 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
537
536
|
raise NotImplementedError(
|
|
538
537
|
"Setting node name is not supported for spark runtime"
|
|
539
538
|
)
|
|
540
|
-
if affinity:
|
|
539
|
+
if affinity is not None:
|
|
541
540
|
self.spec.driver_affinity = affinity
|
|
542
|
-
if node_selector:
|
|
541
|
+
if node_selector is not None:
|
|
542
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
543
543
|
self.spec.driver_node_selector = node_selector
|
|
544
|
-
if tolerations:
|
|
544
|
+
if tolerations is not None:
|
|
545
545
|
self.spec.driver_tolerations = tolerations
|
|
546
546
|
|
|
547
547
|
def with_executor_node_selection(
|
|
@@ -568,11 +568,12 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
568
568
|
raise NotImplementedError(
|
|
569
569
|
"Setting node name is not supported for spark runtime"
|
|
570
570
|
)
|
|
571
|
-
if affinity:
|
|
571
|
+
if affinity is not None:
|
|
572
572
|
self.spec.executor_affinity = affinity
|
|
573
|
-
if node_selector:
|
|
573
|
+
if node_selector is not None:
|
|
574
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
574
575
|
self.spec.executor_node_selector = node_selector
|
|
575
|
-
if tolerations:
|
|
576
|
+
if tolerations is not None:
|
|
576
577
|
self.spec.executor_tolerations = tolerations
|
|
577
578
|
|
|
578
579
|
def with_preemption_mode(
|
|
@@ -811,9 +812,7 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
811
812
|
|
|
812
813
|
@classmethod
|
|
813
814
|
def deploy_default_image(cls, with_gpu=False):
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
815
|
+
sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
817
816
|
sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
|
|
818
817
|
|
|
819
818
|
# setting required resources
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -20,17 +20,17 @@ from io import StringIO
|
|
|
20
20
|
from sys import stderr
|
|
21
21
|
|
|
22
22
|
import pandas as pd
|
|
23
|
-
from kubernetes import client
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
import mlrun.common.constants
|
|
26
|
+
import mlrun.common.constants as mlrun_constants
|
|
27
27
|
import mlrun.common.schemas
|
|
28
28
|
import mlrun.utils.regex
|
|
29
29
|
from mlrun.artifacts import TableArtifact
|
|
30
|
+
from mlrun.common.runtimes.constants import RunLabels
|
|
30
31
|
from mlrun.config import config
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
|
|
33
|
-
from mlrun.runtimes.constants import RunLabels
|
|
34
34
|
from mlrun.runtimes.generators import selector
|
|
35
35
|
from mlrun.utils import get_in, helpers, logger, verify_field_regex
|
|
36
36
|
|
|
@@ -39,9 +39,6 @@ class RunError(Exception):
|
|
|
39
39
|
pass
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
mlrun_key = "mlrun/"
|
|
43
|
-
|
|
44
|
-
|
|
45
42
|
class _ContextStore:
|
|
46
43
|
def __init__(self):
|
|
47
44
|
self._context = None
|
|
@@ -280,43 +277,6 @@ def get_item_name(item, attr="name"):
|
|
|
280
277
|
return getattr(item, attr, None)
|
|
281
278
|
|
|
282
279
|
|
|
283
|
-
def apply_kfp(modify, cop, runtime):
|
|
284
|
-
modify(cop)
|
|
285
|
-
|
|
286
|
-
# Have to do it here to avoid circular dependencies
|
|
287
|
-
from .pod import AutoMountType
|
|
288
|
-
|
|
289
|
-
if AutoMountType.is_auto_modifier(modify):
|
|
290
|
-
runtime.spec.disable_auto_mount = True
|
|
291
|
-
|
|
292
|
-
api = client.ApiClient()
|
|
293
|
-
for k, v in cop.pod_labels.items():
|
|
294
|
-
runtime.metadata.labels[k] = v
|
|
295
|
-
for k, v in cop.pod_annotations.items():
|
|
296
|
-
runtime.metadata.annotations[k] = v
|
|
297
|
-
if cop.container.env:
|
|
298
|
-
env_names = [
|
|
299
|
-
e.name if hasattr(e, "name") else e["name"] for e in runtime.spec.env
|
|
300
|
-
]
|
|
301
|
-
for e in api.sanitize_for_serialization(cop.container.env):
|
|
302
|
-
name = e["name"]
|
|
303
|
-
if name in env_names:
|
|
304
|
-
runtime.spec.env[env_names.index(name)] = e
|
|
305
|
-
else:
|
|
306
|
-
runtime.spec.env.append(e)
|
|
307
|
-
env_names.append(name)
|
|
308
|
-
cop.container.env.clear()
|
|
309
|
-
|
|
310
|
-
if cop.volumes and cop.container.volume_mounts:
|
|
311
|
-
vols = api.sanitize_for_serialization(cop.volumes)
|
|
312
|
-
mounts = api.sanitize_for_serialization(cop.container.volume_mounts)
|
|
313
|
-
runtime.spec.update_vols_and_mounts(vols, mounts)
|
|
314
|
-
cop.volumes.clear()
|
|
315
|
-
cop.container.volume_mounts.clear()
|
|
316
|
-
|
|
317
|
-
return runtime
|
|
318
|
-
|
|
319
|
-
|
|
320
280
|
def verify_limits(
|
|
321
281
|
resources_field_name,
|
|
322
282
|
mem=None,
|
|
@@ -410,41 +370,13 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
|
|
|
410
370
|
|
|
411
371
|
|
|
412
372
|
def get_func_selector(project, name=None, tag=None):
|
|
413
|
-
s = [f"{
|
|
373
|
+
s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
|
|
414
374
|
if name:
|
|
415
|
-
s.append(f"{
|
|
416
|
-
s.append(f"{
|
|
375
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
|
|
376
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
|
|
417
377
|
return s
|
|
418
378
|
|
|
419
379
|
|
|
420
|
-
class k8s_resource:
|
|
421
|
-
kind = ""
|
|
422
|
-
per_run = False
|
|
423
|
-
per_function = False
|
|
424
|
-
k8client = None
|
|
425
|
-
|
|
426
|
-
def deploy_function(self, function):
|
|
427
|
-
pass
|
|
428
|
-
|
|
429
|
-
def release_function(self, function):
|
|
430
|
-
pass
|
|
431
|
-
|
|
432
|
-
def submit_run(self, function, runobj):
|
|
433
|
-
pass
|
|
434
|
-
|
|
435
|
-
def get_object(self, name, namespace=None):
|
|
436
|
-
return None
|
|
437
|
-
|
|
438
|
-
def get_status(self, name, namespace=None):
|
|
439
|
-
return None
|
|
440
|
-
|
|
441
|
-
def del_object(self, name, namespace=None):
|
|
442
|
-
pass
|
|
443
|
-
|
|
444
|
-
def get_pods(self, name, namespace=None, master=False):
|
|
445
|
-
return {}
|
|
446
|
-
|
|
447
|
-
|
|
448
380
|
def enrich_function_from_dict(function, function_dict):
|
|
449
381
|
override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
|
|
450
382
|
for attribute in [
|
|
@@ -504,6 +436,7 @@ def enrich_run_labels(
|
|
|
504
436
|
):
|
|
505
437
|
labels_enrichment = {
|
|
506
438
|
RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
|
|
439
|
+
# TODO: remove this in 1.9.0
|
|
507
440
|
RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
508
441
|
}
|
|
509
442
|
labels_to_enrich = labels_to_enrich or RunLabels.all()
|
|
@@ -512,3 +445,37 @@ def enrich_run_labels(
|
|
|
512
445
|
if label.value not in labels and enrichment:
|
|
513
446
|
labels[label.value] = enrichment
|
|
514
447
|
return labels
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def resolve_node_selectors(
|
|
451
|
+
project_node_selector: dict, instance_node_selector: dict
|
|
452
|
+
) -> dict:
|
|
453
|
+
config_node_selector = mlrun.mlconf.get_default_function_node_selector()
|
|
454
|
+
if project_node_selector or config_node_selector:
|
|
455
|
+
mlrun.utils.logger.debug(
|
|
456
|
+
"Enriching node selector from project and mlrun config",
|
|
457
|
+
project_node_selector=project_node_selector,
|
|
458
|
+
config_node_selector=config_node_selector,
|
|
459
|
+
)
|
|
460
|
+
return mlrun.utils.helpers.merge_dicts_with_precedence(
|
|
461
|
+
config_node_selector,
|
|
462
|
+
project_node_selector,
|
|
463
|
+
instance_node_selector,
|
|
464
|
+
)
|
|
465
|
+
return instance_node_selector
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def enrich_gateway_timeout_annotations(annotations: dict, gateway_timeout: int):
|
|
469
|
+
"""
|
|
470
|
+
Set gateway proxy connect/read/send timeout annotations
|
|
471
|
+
:param annotations: The annotations to enrich
|
|
472
|
+
:param gateway_timeout: The timeout to set
|
|
473
|
+
"""
|
|
474
|
+
if not gateway_timeout:
|
|
475
|
+
return
|
|
476
|
+
gateway_timeout_str = str(gateway_timeout)
|
|
477
|
+
annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
|
|
478
|
+
gateway_timeout_str
|
|
479
|
+
)
|
|
480
|
+
annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = gateway_timeout_str
|
|
481
|
+
annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = gateway_timeout_str
|
mlrun/secrets.py
CHANGED
|
@@ -163,15 +163,19 @@ def get_secret_or_env(
|
|
|
163
163
|
|
|
164
164
|
Example::
|
|
165
165
|
|
|
166
|
-
secrets = {
|
|
166
|
+
secrets = {"KEY1": "VALUE1"}
|
|
167
167
|
secret = get_secret_or_env("KEY1", secret_provider=secrets)
|
|
168
168
|
|
|
169
|
+
|
|
169
170
|
# Using a function to retrieve a secret
|
|
170
171
|
def my_secret_provider(key):
|
|
171
172
|
# some internal logic to retrieve secret
|
|
172
173
|
return value
|
|
173
174
|
|
|
174
|
-
|
|
175
|
+
|
|
176
|
+
secret = get_secret_or_env(
|
|
177
|
+
"KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS"
|
|
178
|
+
)
|
|
175
179
|
|
|
176
180
|
:param key: Secret key to look for
|
|
177
181
|
:param secret_provider: Dictionary, callable or `SecretsStore` to extract the secret value from. If using a
|
mlrun/serving/__init__.py
CHANGED
|
@@ -22,10 +22,17 @@ __all__ = [
|
|
|
22
22
|
"RouterStep",
|
|
23
23
|
"QueueStep",
|
|
24
24
|
"ErrorStep",
|
|
25
|
+
"MonitoringApplicationStep",
|
|
25
26
|
]
|
|
26
27
|
|
|
27
28
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
28
29
|
from .server import GraphContext, GraphServer, create_graph_server # noqa
|
|
29
|
-
from .states import
|
|
30
|
+
from .states import (
|
|
31
|
+
ErrorStep,
|
|
32
|
+
QueueStep,
|
|
33
|
+
RouterStep,
|
|
34
|
+
TaskStep,
|
|
35
|
+
MonitoringApplicationStep,
|
|
36
|
+
) # noqa
|
|
30
37
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
31
38
|
from .v2_serving import V2ModelServer # noqa
|
mlrun/serving/remote.py
CHANGED
|
@@ -172,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
172
172
|
if not self._session:
|
|
173
173
|
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
174
174
|
self.retries,
|
|
175
|
-
self.backoff_factor
|
|
176
|
-
or mlrun.config.config.http_retry_defaults.backoff_factor,
|
|
175
|
+
self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
|
|
177
176
|
retry_on_exception=False,
|
|
178
177
|
retry_on_status=self.retries > 0,
|
|
179
178
|
retry_on_post=True,
|
|
@@ -185,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
185
184
|
resp = self._session.request(
|
|
186
185
|
method,
|
|
187
186
|
url,
|
|
188
|
-
verify=mlrun.
|
|
187
|
+
verify=mlrun.mlconf.httpdb.http.verify,
|
|
189
188
|
headers=headers,
|
|
190
189
|
data=body,
|
|
191
190
|
timeout=self.timeout,
|
mlrun/serving/routers.py
CHANGED
|
@@ -28,10 +28,10 @@ import numpy as np
|
|
|
28
28
|
import mlrun
|
|
29
29
|
import mlrun.common.model_monitoring
|
|
30
30
|
import mlrun.common.schemas.model_monitoring
|
|
31
|
+
from mlrun.errors import err_to_str
|
|
31
32
|
from mlrun.utils import logger, now_date
|
|
32
33
|
|
|
33
34
|
from ..common.helpers import parse_versioned_object_uri
|
|
34
|
-
from ..config import config
|
|
35
35
|
from .server import GraphServer
|
|
36
36
|
from .utils import RouterToDict, _extract_input_data, _update_result_body
|
|
37
37
|
from .v2_serving import _ModelLogPusher
|
|
@@ -271,7 +271,9 @@ class ParallelRun(BaseModelRouter):
|
|
|
271
271
|
fn = mlrun.new_function("parallel", kind="serving")
|
|
272
272
|
graph = fn.set_topology(
|
|
273
273
|
"router",
|
|
274
|
-
mlrun.serving.routers.ParallelRun(
|
|
274
|
+
mlrun.serving.routers.ParallelRun(
|
|
275
|
+
extend_event=True, executor_type=executor
|
|
276
|
+
),
|
|
275
277
|
)
|
|
276
278
|
graph.add_route("child1", class_name="Cls1")
|
|
277
279
|
graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
|
|
@@ -489,6 +491,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
489
491
|
executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
|
|
490
492
|
format_response_with_col_name_flag: bool = False,
|
|
491
493
|
prediction_col_name: str = "prediction",
|
|
494
|
+
shard_by_endpoint: typing.Optional[bool] = None,
|
|
492
495
|
**kwargs,
|
|
493
496
|
):
|
|
494
497
|
"""Voting Ensemble
|
|
@@ -578,6 +581,8 @@ class VotingEnsemble(ParallelRun):
|
|
|
578
581
|
`{id: <id>, model_name: <name>, outputs: {..., prediction: [<predictions>], ...}}`
|
|
579
582
|
the prediction_col_name should be `prediction`.
|
|
580
583
|
by default, `prediction`
|
|
584
|
+
:param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
|
|
585
|
+
monitoring stream. Defaults to True.
|
|
581
586
|
:param kwargs: extra arguments
|
|
582
587
|
"""
|
|
583
588
|
super().__init__(
|
|
@@ -604,6 +609,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
604
609
|
self.prediction_col_name = prediction_col_name or "prediction"
|
|
605
610
|
self.format_response_with_col_name_flag = format_response_with_col_name_flag
|
|
606
611
|
self.model_endpoint_uid = None
|
|
612
|
+
self.shard_by_endpoint = shard_by_endpoint
|
|
607
613
|
|
|
608
614
|
def post_init(self, mode="sync"):
|
|
609
615
|
server = getattr(self.context, "_server", None) or getattr(
|
|
@@ -613,7 +619,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
613
619
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
614
620
|
return
|
|
615
621
|
|
|
616
|
-
if not self.context.is_mock or self.context.
|
|
622
|
+
if not self.context.is_mock or self.context.monitoring_mock:
|
|
617
623
|
self.model_endpoint_uid = _init_endpoint_record(server, self)
|
|
618
624
|
|
|
619
625
|
self._update_weights(self.weights)
|
|
@@ -905,7 +911,12 @@ class VotingEnsemble(ParallelRun):
|
|
|
905
911
|
if self._model_logger and self.log_router:
|
|
906
912
|
if "id" not in request:
|
|
907
913
|
request["id"] = response.body["id"]
|
|
908
|
-
|
|
914
|
+
partition_key = (
|
|
915
|
+
self.model_endpoint_uid if self.shard_by_endpoint is not False else None
|
|
916
|
+
)
|
|
917
|
+
self._model_logger.push(
|
|
918
|
+
start, request, response.body, partition_key=partition_key
|
|
919
|
+
)
|
|
909
920
|
event.body = _update_result_body(
|
|
910
921
|
self._result_path, original_body, response.body if response else None
|
|
911
922
|
)
|
|
@@ -1013,7 +1024,7 @@ def _init_endpoint_record(
|
|
|
1013
1024
|
graph_server.function_uri
|
|
1014
1025
|
)
|
|
1015
1026
|
except Exception as e:
|
|
1016
|
-
logger.error("Failed to parse function URI", exc=e)
|
|
1027
|
+
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
1017
1028
|
return None
|
|
1018
1029
|
|
|
1019
1030
|
# Generating version model value based on the model name and model version
|
|
@@ -1027,74 +1038,88 @@ def _init_endpoint_record(
|
|
|
1027
1038
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
1028
1039
|
).uid
|
|
1029
1040
|
|
|
1030
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
1031
1041
|
try:
|
|
1032
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
1033
|
-
|
|
1042
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
1043
|
+
project=project, endpoint_id=endpoint_uid
|
|
1044
|
+
)
|
|
1034
1045
|
except mlrun.errors.MLRunNotFoundError:
|
|
1046
|
+
model_ep = None
|
|
1047
|
+
except mlrun.errors.MLRunBadRequestError as err:
|
|
1048
|
+
logger.debug(
|
|
1049
|
+
f"Cant reach to model endpoints store, due to : {err}",
|
|
1050
|
+
)
|
|
1051
|
+
return
|
|
1052
|
+
|
|
1053
|
+
if voting_ensemble.context.server.track_models and not model_ep:
|
|
1035
1054
|
logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
|
|
1055
|
+
# Get the children model endpoints ids
|
|
1056
|
+
children_uids = []
|
|
1057
|
+
for _, c in voting_ensemble.routes.items():
|
|
1058
|
+
if hasattr(c, "endpoint_uid"):
|
|
1059
|
+
children_uids.append(c.endpoint_uid)
|
|
1060
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
1061
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
1062
|
+
project=project, uid=endpoint_uid
|
|
1063
|
+
),
|
|
1064
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
1065
|
+
function_uri=graph_server.function_uri,
|
|
1066
|
+
model=versioned_model_name,
|
|
1067
|
+
model_class=voting_ensemble.__class__.__name__,
|
|
1068
|
+
stream_path=voting_ensemble.context.stream.stream_uri,
|
|
1069
|
+
active=True,
|
|
1070
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
1071
|
+
),
|
|
1072
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1073
|
+
children=list(voting_ensemble.routes.keys()),
|
|
1074
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1075
|
+
children_uids=children_uids,
|
|
1076
|
+
),
|
|
1077
|
+
)
|
|
1036
1078
|
|
|
1037
|
-
|
|
1038
|
-
# Get the children model endpoints ids
|
|
1039
|
-
children_uids = []
|
|
1040
|
-
for _, c in voting_ensemble.routes.items():
|
|
1041
|
-
if hasattr(c, "endpoint_uid"):
|
|
1042
|
-
children_uids.append(c.endpoint_uid)
|
|
1043
|
-
|
|
1044
|
-
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
1045
|
-
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
1046
|
-
project=project, uid=endpoint_uid
|
|
1047
|
-
),
|
|
1048
|
-
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
1049
|
-
function_uri=graph_server.function_uri,
|
|
1050
|
-
model=versioned_model_name,
|
|
1051
|
-
model_class=voting_ensemble.__class__.__name__,
|
|
1052
|
-
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1053
|
-
project=project, kind="stream"
|
|
1054
|
-
),
|
|
1055
|
-
active=True,
|
|
1056
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1057
|
-
if voting_ensemble.context.server.track_models
|
|
1058
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
1059
|
-
),
|
|
1060
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1061
|
-
children=list(voting_ensemble.routes.keys()),
|
|
1062
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1063
|
-
children_uids=children_uids,
|
|
1064
|
-
),
|
|
1065
|
-
)
|
|
1079
|
+
db = mlrun.get_run_db()
|
|
1066
1080
|
|
|
1067
|
-
|
|
1081
|
+
db.create_model_endpoint(
|
|
1082
|
+
project=project,
|
|
1083
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
1084
|
+
model_endpoint=model_endpoint.dict(),
|
|
1085
|
+
)
|
|
1068
1086
|
|
|
1087
|
+
# Update model endpoint children type
|
|
1088
|
+
for model_endpoint in children_uids:
|
|
1089
|
+
current_endpoint = db.get_model_endpoint(
|
|
1090
|
+
project=project, endpoint_id=model_endpoint
|
|
1091
|
+
)
|
|
1092
|
+
current_endpoint.status.endpoint_type = (
|
|
1093
|
+
mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
|
|
1094
|
+
)
|
|
1069
1095
|
db.create_model_endpoint(
|
|
1070
1096
|
project=project,
|
|
1071
|
-
endpoint_id=model_endpoint
|
|
1072
|
-
model_endpoint=
|
|
1097
|
+
endpoint_id=model_endpoint,
|
|
1098
|
+
model_endpoint=current_endpoint,
|
|
1073
1099
|
)
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
logger.error("Failed to retrieve model endpoint object", exc=e)
|
|
1100
|
+
elif (
|
|
1101
|
+
model_ep
|
|
1102
|
+
and (
|
|
1103
|
+
model_ep.spec.monitoring_mode
|
|
1104
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1105
|
+
)
|
|
1106
|
+
!= voting_ensemble.context.server.track_models
|
|
1107
|
+
):
|
|
1108
|
+
monitoring_mode = (
|
|
1109
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1110
|
+
if voting_ensemble.context.server.track_models
|
|
1111
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
1112
|
+
)
|
|
1113
|
+
db = mlrun.get_run_db()
|
|
1114
|
+
db.patch_model_endpoint(
|
|
1115
|
+
project=project,
|
|
1116
|
+
endpoint_id=endpoint_uid,
|
|
1117
|
+
attributes={"monitoring_mode": monitoring_mode},
|
|
1118
|
+
)
|
|
1119
|
+
logger.debug(
|
|
1120
|
+
f"Updating model endpoint monitoring_mode to {monitoring_mode}",
|
|
1121
|
+
endpoint_id=endpoint_uid,
|
|
1122
|
+
)
|
|
1098
1123
|
|
|
1099
1124
|
return endpoint_uid
|
|
1100
1125
|
|