mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -2
- mlrun/__main__.py +0 -4
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/plots.py +1 -1
- mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
- mlrun/auth/nuclio.py +89 -0
- mlrun/auth/providers.py +429 -0
- mlrun/auth/utils.py +415 -0
- mlrun/common/constants.py +7 -0
- mlrun/common/model_monitoring/helpers.py +41 -4
- mlrun/common/runtimes/constants.py +28 -0
- mlrun/common/schemas/__init__.py +13 -3
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +3 -0
- mlrun/common/schemas/auth.py +10 -10
- mlrun/common/schemas/client_spec.py +4 -0
- mlrun/common/schemas/constants.py +25 -0
- mlrun/common/schemas/frontend_spec.py +1 -8
- mlrun/common/schemas/function.py +24 -0
- mlrun/common/schemas/hub.py +3 -2
- mlrun/common/schemas/model_monitoring/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/constants.py +2 -2
- mlrun/common/schemas/secret.py +17 -2
- mlrun/common/secrets.py +95 -1
- mlrun/common/types.py +10 -10
- mlrun/config.py +53 -15
- mlrun/data_types/infer.py +2 -2
- mlrun/datastore/__init__.py +2 -3
- mlrun/datastore/base.py +274 -10
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/datastore_profile.py +49 -17
- mlrun/datastore/model_provider/huggingface_provider.py +6 -2
- mlrun/datastore/model_provider/model_provider.py +2 -2
- mlrun/datastore/model_provider/openai_provider.py +2 -2
- mlrun/datastore/s3.py +15 -16
- mlrun/datastore/sources.py +1 -1
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +16 -10
- mlrun/datastore/targets.py +1 -1
- mlrun/datastore/utils.py +16 -3
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +36 -12
- mlrun/db/httpdb.py +316 -101
- mlrun/db/nopdb.py +29 -11
- mlrun/errors.py +4 -2
- mlrun/execution.py +11 -12
- mlrun/feature_store/api.py +1 -1
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_vector_utils.py +1 -1
- mlrun/feature_store/steps.py +8 -6
- mlrun/frameworks/_common/utils.py +3 -3
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +2 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
- mlrun/frameworks/onnx/dataset.py +2 -1
- mlrun/frameworks/onnx/mlrun_interface.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/utils.py +2 -1
- mlrun/frameworks/sklearn/metric.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/hub/__init__.py +37 -0
- mlrun/hub/base.py +142 -0
- mlrun/hub/module.py +67 -76
- mlrun/hub/step.py +113 -0
- mlrun/launcher/base.py +2 -1
- mlrun/launcher/local.py +2 -1
- mlrun/model.py +12 -2
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +20 -6
- mlrun/model_monitoring/applications/context.py +1 -0
- mlrun/model_monitoring/controller.py +7 -17
- mlrun/model_monitoring/db/_schedules.py +2 -16
- mlrun/model_monitoring/db/_stats.py +2 -13
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
- mlrun/model_monitoring/db/tsdb/base.py +2 -4
- mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
- mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
- mlrun/model_monitoring/features_drift_table.py +2 -1
- mlrun/model_monitoring/helpers.py +2 -1
- mlrun/model_monitoring/stream_processing.py +18 -16
- mlrun/model_monitoring/writer.py +4 -3
- mlrun/package/__init__.py +2 -1
- mlrun/platforms/__init__.py +0 -44
- mlrun/platforms/iguazio.py +1 -1
- mlrun/projects/operations.py +11 -10
- mlrun/projects/project.py +81 -82
- mlrun/run.py +4 -7
- mlrun/runtimes/__init__.py +2 -204
- mlrun/runtimes/base.py +89 -21
- mlrun/runtimes/constants.py +225 -0
- mlrun/runtimes/daskjob.py +4 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/mounts.py +5 -0
- mlrun/runtimes/nuclio/__init__.py +12 -8
- mlrun/runtimes/nuclio/api_gateway.py +36 -6
- mlrun/runtimes/nuclio/application/application.py +200 -32
- mlrun/runtimes/nuclio/function.py +154 -49
- mlrun/runtimes/nuclio/serving.py +55 -42
- mlrun/runtimes/pod.py +59 -10
- mlrun/secrets.py +46 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +5 -5
- mlrun/serving/routers.py +3 -3
- mlrun/serving/server.py +46 -43
- mlrun/serving/serving_wrapper.py +6 -2
- mlrun/serving/states.py +554 -207
- mlrun/serving/steps.py +1 -1
- mlrun/serving/system_steps.py +42 -33
- mlrun/track/trackers/mlflow_tracker.py +29 -31
- mlrun/utils/helpers.py +89 -16
- mlrun/utils/http.py +9 -2
- mlrun/utils/notifications/notification/git.py +1 -1
- mlrun/utils/notifications/notification/mail.py +39 -16
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +3 -4
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
- mlrun/db/auth_utils.py +0 -152
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -17,8 +17,10 @@ import copy
|
|
|
17
17
|
import json
|
|
18
18
|
import typing
|
|
19
19
|
import warnings
|
|
20
|
+
from dataclasses import dataclass
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from time import sleep
|
|
23
|
+
from urllib.parse import urlparse, urlunparse
|
|
22
24
|
|
|
23
25
|
import inflection
|
|
24
26
|
import nuclio
|
|
@@ -30,13 +32,14 @@ from kubernetes import client
|
|
|
30
32
|
from nuclio.deploy import find_dashboard_url, get_deploy_status
|
|
31
33
|
from nuclio.triggers import V3IOStreamTrigger
|
|
32
34
|
|
|
35
|
+
import mlrun.auth.nuclio
|
|
33
36
|
import mlrun.common.constants
|
|
34
37
|
import mlrun.db
|
|
35
38
|
import mlrun.errors
|
|
36
39
|
import mlrun.k8s_utils
|
|
37
40
|
import mlrun.utils
|
|
38
41
|
import mlrun.utils.helpers
|
|
39
|
-
from mlrun.common.schemas import AuthInfo
|
|
42
|
+
from mlrun.common.schemas import AuthInfo, BatchingSpec
|
|
40
43
|
from mlrun.config import config as mlconf
|
|
41
44
|
from mlrun.errors import err_to_str
|
|
42
45
|
from mlrun.lists import RunList
|
|
@@ -96,6 +99,13 @@ def min_nuclio_versions(*versions):
|
|
|
96
99
|
return decorator
|
|
97
100
|
|
|
98
101
|
|
|
102
|
+
@dataclass
|
|
103
|
+
class AsyncSpec:
|
|
104
|
+
enabled: bool = True
|
|
105
|
+
max_connections: typing.Optional[int] = None
|
|
106
|
+
connection_availability_timeout: typing.Optional[int] = None
|
|
107
|
+
|
|
108
|
+
|
|
99
109
|
class NuclioSpec(KubeResourceSpec):
|
|
100
110
|
_dict_fields = KubeResourceSpec._dict_fields + [
|
|
101
111
|
"min_replicas",
|
|
@@ -113,6 +123,7 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
113
123
|
"service_type",
|
|
114
124
|
"add_templated_ingress_host_mode",
|
|
115
125
|
"disable_default_http_trigger",
|
|
126
|
+
"auth",
|
|
116
127
|
]
|
|
117
128
|
|
|
118
129
|
def __init__(
|
|
@@ -160,6 +171,7 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
160
171
|
graph=None,
|
|
161
172
|
parameters=None,
|
|
162
173
|
track_models=None,
|
|
174
|
+
auth=None,
|
|
163
175
|
):
|
|
164
176
|
super().__init__(
|
|
165
177
|
command=command,
|
|
@@ -216,6 +228,7 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
216
228
|
# When True it will set Nuclio spec.noBaseImagesPull to False (negative logic)
|
|
217
229
|
# indicate that the base image should be pulled from the container registry (not cached)
|
|
218
230
|
self.base_image_pull = False
|
|
231
|
+
self.auth = auth or {}
|
|
219
232
|
|
|
220
233
|
def generate_nuclio_volumes(self):
|
|
221
234
|
nuclio_volumes = []
|
|
@@ -300,29 +313,16 @@ class RemoteRuntime(KubeResource):
|
|
|
300
313
|
return {}
|
|
301
314
|
|
|
302
315
|
raw_config = copy.deepcopy(self.spec.config)
|
|
303
|
-
|
|
304
316
|
for key, value in self.spec.config.items():
|
|
305
317
|
if key.startswith("spec.triggers"):
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
last_key = nested_keys[-1]
|
|
316
|
-
if last_key in target:
|
|
317
|
-
sensitive_field = target[last_key]
|
|
318
|
-
if sensitive_field.startswith(
|
|
319
|
-
mlrun.model.Credentials.secret_reference_prefix
|
|
320
|
-
):
|
|
321
|
-
# already masked
|
|
322
|
-
continue
|
|
323
|
-
target[last_key] = (
|
|
324
|
-
f"{mlrun.model.Credentials.secret_reference_prefix}/spec/triggers/{trigger_name}/{path}"
|
|
325
|
-
)
|
|
318
|
+
# support both types depending on the way how it was set
|
|
319
|
+
# sometimes trigger name is in the same key, sometimes it's nested in the value dict
|
|
320
|
+
if key == "spec.triggers":
|
|
321
|
+
for trigger_name, trigger_config in value.items():
|
|
322
|
+
self._mask_trigger_config(trigger_name, trigger_config)
|
|
323
|
+
else:
|
|
324
|
+
trigger_name = key.split(".")[-1]
|
|
325
|
+
self._mask_trigger_config(trigger_name, value)
|
|
326
326
|
|
|
327
327
|
return raw_config
|
|
328
328
|
|
|
@@ -464,7 +464,7 @@ class RemoteRuntime(KubeResource):
|
|
|
464
464
|
|
|
465
465
|
def with_http(
|
|
466
466
|
self,
|
|
467
|
-
workers: typing.Optional[int] =
|
|
467
|
+
workers: typing.Optional[int] = None,
|
|
468
468
|
port: typing.Optional[int] = None,
|
|
469
469
|
host: typing.Optional[str] = None,
|
|
470
470
|
paths: typing.Optional[list[str]] = None,
|
|
@@ -475,6 +475,8 @@ class RemoteRuntime(KubeResource):
|
|
|
475
475
|
trigger_name: typing.Optional[str] = None,
|
|
476
476
|
annotations: typing.Optional[typing.Mapping[str, str]] = None,
|
|
477
477
|
extra_attributes: typing.Optional[typing.Mapping[str, str]] = None,
|
|
478
|
+
batching_spec: typing.Optional[BatchingSpec] = None,
|
|
479
|
+
async_spec: typing.Optional[AsyncSpec] = None,
|
|
478
480
|
):
|
|
479
481
|
"""update/add nuclio HTTP trigger settings
|
|
480
482
|
|
|
@@ -482,7 +484,8 @@ class RemoteRuntime(KubeResource):
|
|
|
482
484
|
if the max time a request will wait for until it will start processing, gateway_timeout must be greater than
|
|
483
485
|
the worker_timeout.
|
|
484
486
|
|
|
485
|
-
:param workers:
|
|
487
|
+
:param workers: Number of worker processes. Defaults to 8 in synchronous mode and
|
|
488
|
+
1 in asynchronous mode. Set to 0 to use Nuclio’s default worker count.
|
|
486
489
|
:param port: TCP port to listen on. by default, nuclio will choose a random port as long as
|
|
487
490
|
the function service is NodePort. if the function service is ClusterIP, the port
|
|
488
491
|
is ignored.
|
|
@@ -496,6 +499,12 @@ class RemoteRuntime(KubeResource):
|
|
|
496
499
|
:param trigger_name: alternative nuclio trigger name
|
|
497
500
|
:param annotations: key/value dict of ingress annotations
|
|
498
501
|
:param extra_attributes: key/value dict of extra nuclio trigger attributes
|
|
502
|
+
:param batching_spec: BatchingSpec object that defines batching configuration.
|
|
503
|
+
By default, batching is disabled.
|
|
504
|
+
|
|
505
|
+
:param async_spec: AsyncSpec object defines async configuration. If number of max connections
|
|
506
|
+
won't be set, the default value will be set to 1000 according to nuclio default.
|
|
507
|
+
|
|
499
508
|
:return: function object (self)
|
|
500
509
|
"""
|
|
501
510
|
if self.disable_default_http_trigger:
|
|
@@ -503,11 +512,15 @@ class RemoteRuntime(KubeResource):
|
|
|
503
512
|
"Adding HTTP trigger despite the default HTTP trigger creation being disabled"
|
|
504
513
|
)
|
|
505
514
|
|
|
515
|
+
if async_spec and async_spec.enabled:
|
|
516
|
+
workers = 1 if workers is None else workers
|
|
517
|
+
else:
|
|
518
|
+
workers = 8 if workers is None else workers
|
|
519
|
+
|
|
506
520
|
annotations = annotations or {}
|
|
507
521
|
if worker_timeout:
|
|
508
522
|
gateway_timeout = gateway_timeout or (worker_timeout + 60)
|
|
509
|
-
|
|
510
|
-
workers = 0
|
|
523
|
+
|
|
511
524
|
if gateway_timeout:
|
|
512
525
|
if worker_timeout and worker_timeout >= gateway_timeout:
|
|
513
526
|
raise ValueError(
|
|
@@ -531,6 +544,28 @@ class RemoteRuntime(KubeResource):
|
|
|
531
544
|
trigger._struct["workerAvailabilityTimeoutMilliseconds"] = (
|
|
532
545
|
worker_timeout
|
|
533
546
|
) * 1000
|
|
547
|
+
|
|
548
|
+
if batching_spec and (
|
|
549
|
+
batching_config := batching_spec.get_nuclio_batch_config()
|
|
550
|
+
):
|
|
551
|
+
if not validate_nuclio_version_compatibility("1.14.0"):
|
|
552
|
+
raise mlrun.errors.MLRunValueError(
|
|
553
|
+
"Batching is only supported on Nuclio 1.14.0 and higher"
|
|
554
|
+
)
|
|
555
|
+
trigger._struct["batch"] = batching_config
|
|
556
|
+
|
|
557
|
+
if async_spec:
|
|
558
|
+
if not validate_nuclio_version_compatibility("1.15.3"):
|
|
559
|
+
raise mlrun.errors.MLRunValueError(
|
|
560
|
+
"Async spec is only supported on Nuclio 1.15.3 and higher"
|
|
561
|
+
)
|
|
562
|
+
if async_spec.enabled:
|
|
563
|
+
trigger._struct["mode"] = "async"
|
|
564
|
+
trigger._struct["async"] = {
|
|
565
|
+
"maxConnectionsNumber": async_spec.max_connections,
|
|
566
|
+
"connectionAvailabilityTimeout": async_spec.connection_availability_timeout,
|
|
567
|
+
}
|
|
568
|
+
|
|
534
569
|
self.add_trigger(trigger_name or "http", trigger)
|
|
535
570
|
return self
|
|
536
571
|
|
|
@@ -655,8 +690,6 @@ class RemoteRuntime(KubeResource):
|
|
|
655
690
|
if tag:
|
|
656
691
|
self.metadata.tag = tag
|
|
657
692
|
|
|
658
|
-
mlrun.utils.helpers.validate_function_name(self.metadata.name)
|
|
659
|
-
|
|
660
693
|
# Attempt auto-mounting, before sending to remote build
|
|
661
694
|
self.try_auto_mount_based_on_config()
|
|
662
695
|
self._fill_credentials()
|
|
@@ -843,22 +876,6 @@ class RemoteRuntime(KubeResource):
|
|
|
843
876
|
raise ValueError("function or deploy process not found")
|
|
844
877
|
return self.status.state, text, last_log_timestamp
|
|
845
878
|
|
|
846
|
-
def _get_runtime_env(self):
|
|
847
|
-
# for runtime specific env var enrichment (before deploy)
|
|
848
|
-
runtime_env = {
|
|
849
|
-
mlrun.common.constants.MLRUN_ACTIVE_PROJECT: self.metadata.project
|
|
850
|
-
or mlconf.active_project,
|
|
851
|
-
}
|
|
852
|
-
if mlconf.httpdb.api_url:
|
|
853
|
-
runtime_env["MLRUN_DBPATH"] = mlconf.httpdb.api_url
|
|
854
|
-
if mlconf.namespace:
|
|
855
|
-
runtime_env["MLRUN_NAMESPACE"] = mlconf.namespace
|
|
856
|
-
if self.metadata.credentials.access_key:
|
|
857
|
-
runtime_env[
|
|
858
|
-
mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
859
|
-
] = self.metadata.credentials.access_key
|
|
860
|
-
return runtime_env
|
|
861
|
-
|
|
862
879
|
def _get_serving_spec(self):
|
|
863
880
|
return None
|
|
864
881
|
|
|
@@ -883,8 +900,9 @@ class RemoteRuntime(KubeResource):
|
|
|
883
900
|
if value_from is not None:
|
|
884
901
|
external_source_env_dict[sanitized_env_var.get("name")] = value_from
|
|
885
902
|
|
|
886
|
-
|
|
887
|
-
|
|
903
|
+
envs, external_source_envs = self._generate_runtime_env()
|
|
904
|
+
env_dict.update(envs)
|
|
905
|
+
external_source_env_dict.update(external_source_envs)
|
|
888
906
|
|
|
889
907
|
return env_dict, external_source_env_dict
|
|
890
908
|
|
|
@@ -941,7 +959,7 @@ class RemoteRuntime(KubeResource):
|
|
|
941
959
|
def invoke(
|
|
942
960
|
self,
|
|
943
961
|
path: str,
|
|
944
|
-
body: typing.Optional[typing.Union[str, bytes, dict]] = None,
|
|
962
|
+
body: typing.Optional[typing.Union[str, bytes, dict, list]] = None,
|
|
945
963
|
method: typing.Optional[str] = None,
|
|
946
964
|
headers: typing.Optional[dict] = None,
|
|
947
965
|
force_external_address: bool = False,
|
|
@@ -996,7 +1014,7 @@ class RemoteRuntime(KubeResource):
|
|
|
996
1014
|
if not http_client_kwargs:
|
|
997
1015
|
http_client_kwargs = {}
|
|
998
1016
|
if body:
|
|
999
|
-
if isinstance(body,
|
|
1017
|
+
if isinstance(body, str | bytes):
|
|
1000
1018
|
http_client_kwargs["data"] = body
|
|
1001
1019
|
else:
|
|
1002
1020
|
http_client_kwargs["json"] = body
|
|
@@ -1070,6 +1088,20 @@ class RemoteRuntime(KubeResource):
|
|
|
1070
1088
|
sidecar["resources"] = self.spec.resources
|
|
1071
1089
|
self.spec.resources = None
|
|
1072
1090
|
|
|
1091
|
+
def set_probe(self, *args, **kwargs):
|
|
1092
|
+
"""Set a Kubernetes probe configuration for the sidecar container
|
|
1093
|
+
|
|
1094
|
+
This method is only available for ApplicationRuntime.
|
|
1095
|
+
"""
|
|
1096
|
+
raise ValueError("set_probe() is only supported for ApplicationRuntime. ")
|
|
1097
|
+
|
|
1098
|
+
def delete_probe(self, *args, **kwargs):
|
|
1099
|
+
"""Delete a Kubernetes probe configuration from the sidecar container
|
|
1100
|
+
|
|
1101
|
+
This method is only available for ApplicationRuntime.
|
|
1102
|
+
"""
|
|
1103
|
+
raise ValueError("delete_probe() is only supported for ApplicationRuntime.")
|
|
1104
|
+
|
|
1073
1105
|
def _set_sidecar(self, name: str) -> dict:
|
|
1074
1106
|
self.spec.config.setdefault("spec.sidecars", [])
|
|
1075
1107
|
sidecars = self.spec.config["spec.sidecars"]
|
|
@@ -1080,6 +1112,79 @@ class RemoteRuntime(KubeResource):
|
|
|
1080
1112
|
sidecars.append({"name": name})
|
|
1081
1113
|
return sidecars[-1]
|
|
1082
1114
|
|
|
1115
|
+
def _mask_trigger_config(self, trigger_name, trigger_config):
|
|
1116
|
+
self._mask_rabbitmq_url(trigger=trigger_config)
|
|
1117
|
+
for path in SENSITIVE_PATHS_IN_TRIGGER_CONFIG:
|
|
1118
|
+
# Handle nested keys
|
|
1119
|
+
nested_keys = path.split("/")
|
|
1120
|
+
target = trigger_config
|
|
1121
|
+
for sub_key in nested_keys[:-1]:
|
|
1122
|
+
target = target.get(sub_key, {})
|
|
1123
|
+
|
|
1124
|
+
last_key = nested_keys[-1]
|
|
1125
|
+
if last_key in target:
|
|
1126
|
+
sensitive_field = target[last_key]
|
|
1127
|
+
if sensitive_field.startswith(
|
|
1128
|
+
mlrun.model.Credentials.secret_reference_prefix
|
|
1129
|
+
):
|
|
1130
|
+
# already masked
|
|
1131
|
+
continue
|
|
1132
|
+
target[last_key] = (
|
|
1133
|
+
f"{mlrun.model.Credentials.secret_reference_prefix}/spec/triggers/{trigger_name}/{path}"
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
@staticmethod
|
|
1137
|
+
def _mask_rabbitmq_url(trigger):
|
|
1138
|
+
"""
|
|
1139
|
+
Extract credentials from RabbitMQ URL and move them to attributes dict.
|
|
1140
|
+
This ensures credentials are not exposed in the URL.
|
|
1141
|
+
"""
|
|
1142
|
+
|
|
1143
|
+
# supported only for nuclio higher than 1.14.15
|
|
1144
|
+
if not validate_nuclio_version_compatibility("1.14.15"):
|
|
1145
|
+
return
|
|
1146
|
+
if not isinstance(trigger, dict):
|
|
1147
|
+
return
|
|
1148
|
+
|
|
1149
|
+
if trigger.get("kind") != "rabbit-mq":
|
|
1150
|
+
return
|
|
1151
|
+
|
|
1152
|
+
url = trigger.get("url")
|
|
1153
|
+
if not url or not isinstance(url, str):
|
|
1154
|
+
return
|
|
1155
|
+
|
|
1156
|
+
try:
|
|
1157
|
+
parsed = urlparse(url)
|
|
1158
|
+
except Exception:
|
|
1159
|
+
raise mlrun.errors.MLRunValueError("invalid URL format")
|
|
1160
|
+
|
|
1161
|
+
# Only process if credentials are present in the URL
|
|
1162
|
+
if not (parsed.username or parsed.password):
|
|
1163
|
+
return
|
|
1164
|
+
|
|
1165
|
+
# Extract credentials
|
|
1166
|
+
username = parsed.username or ""
|
|
1167
|
+
password = parsed.password or ""
|
|
1168
|
+
|
|
1169
|
+
# Reconstruct clean URL
|
|
1170
|
+
hostname = parsed.hostname or ""
|
|
1171
|
+
netloc = f"{hostname}:{parsed.port}" if parsed.port else hostname
|
|
1172
|
+
|
|
1173
|
+
clean_url = urlunparse(
|
|
1174
|
+
(
|
|
1175
|
+
parsed.scheme,
|
|
1176
|
+
netloc,
|
|
1177
|
+
parsed.path,
|
|
1178
|
+
parsed.params,
|
|
1179
|
+
parsed.query,
|
|
1180
|
+
parsed.fragment,
|
|
1181
|
+
)
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
# Update trigger safely
|
|
1185
|
+
trigger["url"] = clean_url
|
|
1186
|
+
trigger.update({"username": username, "password": password})
|
|
1187
|
+
|
|
1083
1188
|
def _trigger_of_kind_exists(self, kind: str) -> bool:
|
|
1084
1189
|
if not self.spec.config:
|
|
1085
1190
|
return False
|
|
@@ -1456,7 +1561,7 @@ def get_nuclio_deploy_status(
|
|
|
1456
1561
|
verbose,
|
|
1457
1562
|
resolve_address,
|
|
1458
1563
|
return_function_status=True,
|
|
1459
|
-
auth_info=
|
|
1564
|
+
auth_info=mlrun.auth.nuclio.NuclioAuthInfo.from_auth_info(auth_info),
|
|
1460
1565
|
)
|
|
1461
1566
|
except requests.exceptions.ConnectionError as exc:
|
|
1462
1567
|
mlrun.errors.raise_for_status(
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import json
|
|
15
15
|
import os
|
|
16
|
-
import warnings
|
|
17
16
|
from base64 import b64decode
|
|
18
17
|
from copy import deepcopy
|
|
19
18
|
from typing import Optional, Union
|
|
@@ -23,7 +22,11 @@ from nuclio import KafkaTrigger
|
|
|
23
22
|
|
|
24
23
|
import mlrun
|
|
25
24
|
import mlrun.common.schemas as schemas
|
|
25
|
+
import mlrun.common.secrets
|
|
26
26
|
import mlrun.datastore.datastore_profile as ds_profile
|
|
27
|
+
import mlrun.runtimes.kubejob as kubejob_runtime
|
|
28
|
+
import mlrun.runtimes.nuclio.function as nuclio_function
|
|
29
|
+
import mlrun.runtimes.pod as pod_runtime
|
|
27
30
|
from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
|
|
28
31
|
from mlrun.model import ObjectList
|
|
29
32
|
from mlrun.runtimes.function_reference import FunctionReference
|
|
@@ -44,10 +47,6 @@ from mlrun.serving.states import (
|
|
|
44
47
|
)
|
|
45
48
|
from mlrun.utils import get_caller_globals, logger, set_paths
|
|
46
49
|
|
|
47
|
-
from .. import KubejobRuntime
|
|
48
|
-
from ..pod import KubeResourceSpec
|
|
49
|
-
from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
|
|
50
|
-
|
|
51
50
|
serving_subkind = "serving_v2"
|
|
52
51
|
|
|
53
52
|
|
|
@@ -85,8 +84,8 @@ def new_v2_model_server(
|
|
|
85
84
|
return f
|
|
86
85
|
|
|
87
86
|
|
|
88
|
-
class ServingSpec(NuclioSpec):
|
|
89
|
-
_dict_fields = NuclioSpec._dict_fields + [
|
|
87
|
+
class ServingSpec(nuclio_function.NuclioSpec):
|
|
88
|
+
_dict_fields = nuclio_function.NuclioSpec._dict_fields + [
|
|
90
89
|
"graph",
|
|
91
90
|
"load_mode",
|
|
92
91
|
"graph_initializer",
|
|
@@ -154,6 +153,7 @@ class ServingSpec(NuclioSpec):
|
|
|
154
153
|
disable_default_http_trigger=None,
|
|
155
154
|
model_endpoint_creation_task_name=None,
|
|
156
155
|
serving_spec=None,
|
|
156
|
+
auth=None,
|
|
157
157
|
):
|
|
158
158
|
super().__init__(
|
|
159
159
|
command=command,
|
|
@@ -195,6 +195,7 @@ class ServingSpec(NuclioSpec):
|
|
|
195
195
|
add_templated_ingress_host_mode=add_templated_ingress_host_mode,
|
|
196
196
|
disable_default_http_trigger=disable_default_http_trigger,
|
|
197
197
|
serving_spec=serving_spec,
|
|
198
|
+
auth=auth,
|
|
198
199
|
)
|
|
199
200
|
|
|
200
201
|
self.models = models or {}
|
|
@@ -231,7 +232,7 @@ class ServingSpec(NuclioSpec):
|
|
|
231
232
|
self._function_refs = ObjectList.from_list(FunctionReference, function_refs)
|
|
232
233
|
|
|
233
234
|
|
|
234
|
-
class ServingRuntime(RemoteRuntime):
|
|
235
|
+
class ServingRuntime(nuclio_function.RemoteRuntime):
|
|
235
236
|
"""MLRun Serving Runtime"""
|
|
236
237
|
|
|
237
238
|
kind = "serving"
|
|
@@ -250,6 +251,8 @@ class ServingRuntime(RemoteRuntime):
|
|
|
250
251
|
class_name=None,
|
|
251
252
|
engine=None,
|
|
252
253
|
exist_ok=False,
|
|
254
|
+
allow_cyclic: bool = False,
|
|
255
|
+
max_iterations: Optional[int] = None,
|
|
253
256
|
**class_args,
|
|
254
257
|
) -> Union[RootFlowStep, RouterStep]:
|
|
255
258
|
"""set the serving graph topology (router/flow) and root class or params
|
|
@@ -280,14 +283,23 @@ class ServingRuntime(RemoteRuntime):
|
|
|
280
283
|
:param class_name: - optional for router, router class name/path or router object
|
|
281
284
|
:param engine: - optional for flow, sync or async engine
|
|
282
285
|
:param exist_ok: - allow overriding existing topology
|
|
286
|
+
:param allow_cyclic: - allow cyclic graphs (only for async flow)
|
|
287
|
+
:param max_iterations: - optional, max iterations for cyclic graphs (only for async flow)
|
|
283
288
|
:param class_args: - optional, router/flow class init args
|
|
284
289
|
|
|
285
|
-
:return graph object (fn.spec.graph)
|
|
290
|
+
:return: graph object (fn.spec.graph)
|
|
286
291
|
"""
|
|
287
292
|
topology = topology or StepKinds.router
|
|
288
293
|
if self.spec.graph and not exist_ok:
|
|
289
294
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
290
|
-
"graph topology is already set,
|
|
295
|
+
"graph topology is already set, graph was initialized, use exist_ok=True to override"
|
|
296
|
+
)
|
|
297
|
+
if allow_cyclic and (
|
|
298
|
+
topology == StepKinds.router
|
|
299
|
+
or (topology == StepKinds.flow and engine == "sync")
|
|
300
|
+
):
|
|
301
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
302
|
+
"cyclic graphs are only supported in flow topology with async engine"
|
|
291
303
|
)
|
|
292
304
|
|
|
293
305
|
if topology == StepKinds.router:
|
|
@@ -301,7 +313,11 @@ class ServingRuntime(RemoteRuntime):
|
|
|
301
313
|
step = RouterStep(class_name=class_name, class_args=class_args)
|
|
302
314
|
self.spec.graph = step
|
|
303
315
|
elif topology == StepKinds.flow:
|
|
304
|
-
self.spec.graph = RootFlowStep(
|
|
316
|
+
self.spec.graph = RootFlowStep(
|
|
317
|
+
engine=engine or "async",
|
|
318
|
+
allow_cyclic=allow_cyclic,
|
|
319
|
+
max_iterations=max_iterations,
|
|
320
|
+
)
|
|
305
321
|
self.spec.graph.track_models = self.spec.track_models
|
|
306
322
|
else:
|
|
307
323
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -312,7 +328,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
312
328
|
def set_tracking(
|
|
313
329
|
self,
|
|
314
330
|
stream_path: Optional[str] = None,
|
|
315
|
-
batch: Optional[int] = None,
|
|
316
331
|
sampling_percentage: float = 100,
|
|
317
332
|
stream_args: Optional[dict] = None,
|
|
318
333
|
enable_tracking: bool = True,
|
|
@@ -322,7 +337,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
322
337
|
|
|
323
338
|
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
324
339
|
you can use the "dummy://" path for test/simulation.
|
|
325
|
-
:param batch: Deprecated. Micro batch size (send micro batches of N records at a time).
|
|
326
340
|
:param sampling_percentage: Down sampling events that will be pushed to the monitoring stream based on
|
|
327
341
|
a specified percentage. e.g. 50 for 50%. By default, all events are pushed.
|
|
328
342
|
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
@@ -370,13 +384,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
370
384
|
|
|
371
385
|
if stream_path:
|
|
372
386
|
self.spec.parameters["log_stream"] = stream_path
|
|
373
|
-
if batch:
|
|
374
|
-
warnings.warn(
|
|
375
|
-
"The `batch` size parameter was deprecated in version 1.8.0 and is no longer used. "
|
|
376
|
-
"It will be removed in 1.11.",
|
|
377
|
-
# TODO: Remove this in 1.11
|
|
378
|
-
FutureWarning,
|
|
379
|
-
)
|
|
380
387
|
if stream_args:
|
|
381
388
|
self.spec.parameters["stream_args"] = stream_args
|
|
382
389
|
|
|
@@ -395,7 +402,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
395
402
|
outputs: Optional[list[str]] = None,
|
|
396
403
|
**class_args,
|
|
397
404
|
):
|
|
398
|
-
"""
|
|
405
|
+
"""Add ml model and/or route to the function.
|
|
399
406
|
|
|
400
407
|
Example, create a function (from the notebook), add a model class, and deploy::
|
|
401
408
|
|
|
@@ -403,7 +410,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
403
410
|
fn.add_model("boost", model_path, model_class="MyClass", my_arg=5)
|
|
404
411
|
fn.deploy()
|
|
405
412
|
|
|
406
|
-
|
|
413
|
+
Only works with router topology. For nested topologies (model under router under flow)
|
|
407
414
|
need to add router to flow and use router.add_route()
|
|
408
415
|
|
|
409
416
|
:param key: model api key (or name:version), will determine the relative url/path
|
|
@@ -416,18 +423,19 @@ class ServingRuntime(RemoteRuntime):
|
|
|
416
423
|
with multiple router steps)
|
|
417
424
|
:param child_function: child function name, when the model runs in a child function
|
|
418
425
|
:param creation_strategy: Strategy for creating or updating the model endpoint:
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
model
|
|
426
|
+
|
|
427
|
+
* **overwrite**: If model endpoints with the same name exist, delete the `latest`
|
|
428
|
+
one. Create a new model endpoint entry and set it as `latest`.
|
|
429
|
+
|
|
430
|
+
* **inplace** (default): If model endpoints with the same name exist, update the
|
|
431
|
+
`latest` entry. Otherwise, create a new entry.
|
|
432
|
+
|
|
433
|
+
* **archive**: If model endpoints with the same name exist, preserve them.
|
|
434
|
+
Create a new model endpoint with the same name and set it to `latest`.
|
|
435
|
+
|
|
436
|
+
:param outputs: list of the model outputs (e.g. labels), if provided will override the outputs that were
|
|
437
|
+
configured in the model artifact. Note that those outputs need to be equal to the
|
|
438
|
+
model serving function outputs (length, and order).
|
|
431
439
|
:param class_args: extra kwargs to pass to the model serving class __init__
|
|
432
440
|
(can be read in the model using .get_param(key) method)
|
|
433
441
|
"""
|
|
@@ -520,7 +528,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
520
528
|
:param requirements: py package requirements file path OR list of packages
|
|
521
529
|
:param kind: mlrun function/runtime kind
|
|
522
530
|
|
|
523
|
-
:return function object
|
|
531
|
+
:return: function object
|
|
524
532
|
"""
|
|
525
533
|
function_reference = FunctionReference(
|
|
526
534
|
url,
|
|
@@ -635,14 +643,19 @@ class ServingRuntime(RemoteRuntime):
|
|
|
635
643
|
|
|
636
644
|
:returns: The Runtime (function) object
|
|
637
645
|
"""
|
|
638
|
-
|
|
646
|
+
if kind == "azure_vault" and isinstance(source, dict):
|
|
647
|
+
candidate_secret_name = (source.get("k8s_secret") or "").strip()
|
|
648
|
+
if candidate_secret_name:
|
|
649
|
+
mlrun.common.secrets.validate_not_forbidden_secret(
|
|
650
|
+
candidate_secret_name
|
|
651
|
+
)
|
|
639
652
|
if kind == "vault" and isinstance(source, list):
|
|
640
653
|
source = {"project": self.metadata.project, "secrets": source}
|
|
641
654
|
|
|
642
655
|
self.spec.secret_sources.append({"kind": kind, "source": source})
|
|
643
656
|
return self
|
|
644
657
|
|
|
645
|
-
@min_nuclio_versions("1.12.10")
|
|
658
|
+
@nuclio_function.min_nuclio_versions("1.12.10")
|
|
646
659
|
def deploy(
|
|
647
660
|
self,
|
|
648
661
|
project="",
|
|
@@ -659,8 +672,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
659
672
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
660
673
|
:param force_build: set True for force building the image
|
|
661
674
|
"""
|
|
662
|
-
# Validate function name before deploying to k8s
|
|
663
|
-
mlrun.utils.helpers.validate_function_name(self.metadata.name)
|
|
664
675
|
|
|
665
676
|
load_mode = self.spec.load_mode
|
|
666
677
|
if load_mode and load_mode not in ["sync", "async"]:
|
|
@@ -858,7 +869,9 @@ class ServingRuntime(RemoteRuntime):
|
|
|
858
869
|
)
|
|
859
870
|
self._mock_server = self.to_mock_server()
|
|
860
871
|
|
|
861
|
-
def to_job(
|
|
872
|
+
def to_job(
|
|
873
|
+
self, func_name: Optional[str] = None
|
|
874
|
+
) -> "kubejob_runtime.KubejobRuntime":
|
|
862
875
|
"""Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job.
|
|
863
876
|
|
|
864
877
|
Args:
|
|
@@ -877,7 +890,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
877
890
|
f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
|
|
878
891
|
)
|
|
879
892
|
|
|
880
|
-
spec = KubeResourceSpec(
|
|
893
|
+
spec = pod_runtime.KubeResourceSpec(
|
|
881
894
|
image=self.spec.image,
|
|
882
895
|
mode=self.spec.mode,
|
|
883
896
|
volumes=self.spec.volumes,
|
|
@@ -947,7 +960,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
947
960
|
suffix=suffix,
|
|
948
961
|
)
|
|
949
962
|
|
|
950
|
-
job = KubejobRuntime(
|
|
963
|
+
job = kubejob_runtime.KubejobRuntime(
|
|
951
964
|
spec=spec,
|
|
952
965
|
metadata=job_metadata,
|
|
953
966
|
)
|