mlrun 1.7.0rc20__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +10 -8
- mlrun/alerts/alert.py +55 -18
- mlrun/api/schemas/__init__.py +3 -3
- mlrun/artifacts/manager.py +26 -0
- mlrun/common/constants.py +3 -2
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +26 -3
- mlrun/common/formatters/base.py +44 -9
- mlrun/common/formatters/function.py +12 -7
- mlrun/common/formatters/run.py +26 -0
- mlrun/common/helpers.py +11 -0
- mlrun/common/schemas/__init__.py +4 -0
- mlrun/common/schemas/alert.py +5 -9
- mlrun/common/schemas/api_gateway.py +64 -16
- mlrun/common/schemas/artifact.py +11 -0
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/model_monitoring/constants.py +21 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
- mlrun/common/schemas/pipeline.py +16 -0
- mlrun/common/schemas/project.py +17 -0
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/types.py +6 -0
- mlrun/config.py +17 -25
- mlrun/datastore/azure_blob.py +2 -1
- mlrun/datastore/datastore.py +3 -3
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/snowflake_utils.py +3 -1
- mlrun/datastore/sources.py +26 -11
- mlrun/datastore/store_resources.py +2 -0
- mlrun/datastore/targets.py +68 -16
- mlrun/db/base.py +83 -2
- mlrun/db/httpdb.py +280 -63
- mlrun/db/nopdb.py +60 -3
- mlrun/errors.py +5 -3
- mlrun/execution.py +28 -13
- mlrun/feature_store/feature_vector.py +8 -0
- mlrun/feature_store/retrieval/spark_merger.py +13 -2
- mlrun/launcher/local.py +4 -0
- mlrun/launcher/remote.py +1 -0
- mlrun/model.py +32 -3
- mlrun/model_monitoring/api.py +7 -52
- mlrun/model_monitoring/applications/base.py +5 -7
- mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
- mlrun/model_monitoring/db/stores/__init__.py +37 -24
- mlrun/model_monitoring/db/stores/base/store.py +40 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +42 -87
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +27 -35
- mlrun/model_monitoring/db/tsdb/__init__.py +15 -15
- mlrun/model_monitoring/db/tsdb/base.py +1 -14
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +22 -18
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +86 -56
- mlrun/model_monitoring/helpers.py +34 -9
- mlrun/model_monitoring/stream_processing.py +12 -11
- mlrun/model_monitoring/writer.py +11 -11
- mlrun/projects/operations.py +5 -0
- mlrun/projects/pipelines.py +35 -21
- mlrun/projects/project.py +216 -107
- mlrun/render.py +10 -5
- mlrun/run.py +15 -5
- mlrun/runtimes/__init__.py +2 -0
- mlrun/runtimes/base.py +17 -4
- mlrun/runtimes/daskjob.py +8 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/local.py +23 -4
- mlrun/runtimes/nuclio/application/application.py +0 -2
- mlrun/runtimes/nuclio/function.py +31 -2
- mlrun/runtimes/nuclio/serving.py +9 -6
- mlrun/runtimes/pod.py +5 -29
- mlrun/runtimes/remotesparkjob.py +8 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/routers.py +75 -59
- mlrun/serving/server.py +11 -0
- mlrun/serving/states.py +80 -8
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +66 -39
- mlrun/utils/helpers.py +91 -11
- mlrun/utils/logger.py +36 -2
- mlrun/utils/notifications/notification/base.py +43 -7
- mlrun/utils/notifications/notification/git.py +21 -0
- mlrun/utils/notifications/notification/slack.py +9 -14
- mlrun/utils/notifications/notification/webhook.py +41 -1
- mlrun/utils/notifications/notification_pusher.py +3 -9
- mlrun/utils/regex.py +9 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/METADATA +16 -9
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/RECORD +92 -91
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/top_level.txt +0 -0
mlrun/run.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import importlib.util as imputil
|
|
15
16
|
import json
|
|
16
17
|
import os
|
|
@@ -28,10 +29,11 @@ from typing import Optional, Union
|
|
|
28
29
|
|
|
29
30
|
import nuclio
|
|
30
31
|
import yaml
|
|
31
|
-
from kfp import Client
|
|
32
32
|
from mlrun_pipelines.common.models import RunStatuses
|
|
33
33
|
from mlrun_pipelines.common.ops import format_summary_from_kfp_run, show_kfp_run
|
|
34
|
+
from mlrun_pipelines.utils import get_client
|
|
34
35
|
|
|
36
|
+
import mlrun.common.constants as mlrun_constants
|
|
35
37
|
import mlrun.common.formatters
|
|
36
38
|
import mlrun.common.schemas
|
|
37
39
|
import mlrun.errors
|
|
@@ -61,11 +63,11 @@ from .runtimes.funcdoc import update_function_entry_points
|
|
|
61
63
|
from .runtimes.nuclio.application import ApplicationRuntime
|
|
62
64
|
from .runtimes.utils import add_code_metadata, global_context
|
|
63
65
|
from .utils import (
|
|
66
|
+
RunKeys,
|
|
64
67
|
extend_hub_uri_if_needed,
|
|
65
68
|
get_in,
|
|
66
69
|
logger,
|
|
67
70
|
retry_until_successful,
|
|
68
|
-
run_keys,
|
|
69
71
|
update_in,
|
|
70
72
|
)
|
|
71
73
|
|
|
@@ -278,7 +280,7 @@ def get_or_create_ctx(
|
|
|
278
280
|
artifact_path = mlrun.utils.helpers.template_artifact_path(
|
|
279
281
|
mlconf.artifact_path, project or mlconf.default_project
|
|
280
282
|
)
|
|
281
|
-
update_in(newspec, ["spec",
|
|
283
|
+
update_in(newspec, ["spec", RunKeys.output_path], artifact_path)
|
|
282
284
|
|
|
283
285
|
newspec.setdefault("metadata", {})
|
|
284
286
|
update_in(newspec, "metadata.name", name, replace=False)
|
|
@@ -293,6 +295,14 @@ def get_or_create_ctx(
|
|
|
293
295
|
newspec["metadata"].get("project") or project or mlconf.default_project
|
|
294
296
|
)
|
|
295
297
|
|
|
298
|
+
newspec["metadata"].setdefault("labels", {})
|
|
299
|
+
|
|
300
|
+
# This function can also be called as a local run if it is not called within a function.
|
|
301
|
+
# It will create a local run, and the run kind must be local by default.
|
|
302
|
+
newspec["metadata"]["labels"].setdefault(
|
|
303
|
+
mlrun_constants.MLRunInternalLabels.kind, RuntimeKinds.local
|
|
304
|
+
)
|
|
305
|
+
|
|
296
306
|
ctx = MLClientCtx.from_dict(
|
|
297
307
|
newspec, rundb=out, autocommit=autocommit, tmp=tmp, host=socket.gethostname()
|
|
298
308
|
)
|
|
@@ -943,7 +953,7 @@ def wait_for_pipeline_completion(
|
|
|
943
953
|
_wait_for_pipeline_completion,
|
|
944
954
|
)
|
|
945
955
|
else:
|
|
946
|
-
client =
|
|
956
|
+
client = get_client(namespace=namespace)
|
|
947
957
|
resp = client.wait_for_run_completion(run_id, timeout)
|
|
948
958
|
if resp:
|
|
949
959
|
resp = resp.to_dict()
|
|
@@ -1004,7 +1014,7 @@ def get_pipeline(
|
|
|
1004
1014
|
)
|
|
1005
1015
|
|
|
1006
1016
|
else:
|
|
1007
|
-
client =
|
|
1017
|
+
client = get_client(namespace=namespace)
|
|
1008
1018
|
resp = client.get_run(run_id)
|
|
1009
1019
|
if resp:
|
|
1010
1020
|
resp = resp.to_dict()
|
mlrun/runtimes/__init__.py
CHANGED
mlrun/runtimes/base.py
CHANGED
|
@@ -68,6 +68,7 @@ spec_fields = [
|
|
|
68
68
|
"disable_auto_mount",
|
|
69
69
|
"allow_empty_resources",
|
|
70
70
|
"clone_target_dir",
|
|
71
|
+
"reset_on_run",
|
|
71
72
|
]
|
|
72
73
|
|
|
73
74
|
|
|
@@ -336,6 +337,7 @@ class BaseRuntime(ModelObj):
|
|
|
336
337
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
337
338
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
338
339
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
340
|
+
reset_on_run: Optional[bool] = None,
|
|
339
341
|
**launcher_kwargs,
|
|
340
342
|
) -> RunObject:
|
|
341
343
|
"""
|
|
@@ -390,6 +392,9 @@ class BaseRuntime(ModelObj):
|
|
|
390
392
|
standards and is at least 1 minute (-1 for infinite).
|
|
391
393
|
If the phase is active for longer than the threshold, the run will be aborted.
|
|
392
394
|
See mlconf.function.spec.state_thresholds for the state options and default values.
|
|
395
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
396
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
397
|
+
conjunction with the local=True argument.
|
|
393
398
|
:return: Run context object (RunObject) with run metadata, results and status
|
|
394
399
|
"""
|
|
395
400
|
launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
|
|
@@ -418,15 +423,22 @@ class BaseRuntime(ModelObj):
|
|
|
418
423
|
notifications=notifications,
|
|
419
424
|
returns=returns,
|
|
420
425
|
state_thresholds=state_thresholds,
|
|
426
|
+
reset_on_run=reset_on_run,
|
|
421
427
|
)
|
|
422
428
|
|
|
423
|
-
def _get_db_run(
|
|
429
|
+
def _get_db_run(
|
|
430
|
+
self,
|
|
431
|
+
task: RunObject = None,
|
|
432
|
+
run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
|
|
433
|
+
):
|
|
424
434
|
if self._get_db() and task:
|
|
425
435
|
project = task.metadata.project
|
|
426
436
|
uid = task.metadata.uid
|
|
427
437
|
iter = task.metadata.iteration
|
|
428
438
|
try:
|
|
429
|
-
return self._get_db().read_run(
|
|
439
|
+
return self._get_db().read_run(
|
|
440
|
+
uid, project, iter=iter, format_=run_format
|
|
441
|
+
)
|
|
430
442
|
except mlrun.db.RunDBError:
|
|
431
443
|
return None
|
|
432
444
|
if task:
|
|
@@ -543,13 +555,14 @@ class BaseRuntime(ModelObj):
|
|
|
543
555
|
self,
|
|
544
556
|
resp: dict = None,
|
|
545
557
|
task: RunObject = None,
|
|
546
|
-
err=None,
|
|
558
|
+
err: Union[Exception, str] = None,
|
|
559
|
+
run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
|
|
547
560
|
) -> typing.Optional[dict]:
|
|
548
561
|
"""update the task state in the DB"""
|
|
549
562
|
was_none = False
|
|
550
563
|
if resp is None and task:
|
|
551
564
|
was_none = True
|
|
552
|
-
resp = self._get_db_run(task)
|
|
565
|
+
resp = self._get_db_run(task, run_format)
|
|
553
566
|
|
|
554
567
|
if not resp:
|
|
555
568
|
self.store_run(task)
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -494,6 +494,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
494
494
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
495
495
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
496
496
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
497
|
+
reset_on_run: Optional[bool] = None,
|
|
497
498
|
**launcher_kwargs,
|
|
498
499
|
) -> RunObject:
|
|
499
500
|
if state_thresholds:
|
|
@@ -547,7 +548,13 @@ class DaskCluster(KubejobRuntime):
|
|
|
547
548
|
"specified handler (string) without command "
|
|
548
549
|
"(py file path), specify command or use handler pointer"
|
|
549
550
|
)
|
|
550
|
-
|
|
551
|
+
# Do not embed the module in system as it is not persistent with the dask cluster
|
|
552
|
+
handler = load_module(
|
|
553
|
+
self.spec.command,
|
|
554
|
+
handler,
|
|
555
|
+
context=context,
|
|
556
|
+
embed_in_sys=False,
|
|
557
|
+
)
|
|
551
558
|
client = self.client
|
|
552
559
|
setattr(context, "dask_client", client)
|
|
553
560
|
sout, serr = exec_from_params(handler, runobj, context)
|
|
@@ -232,6 +232,7 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
|
|
|
232
232
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
233
233
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
234
234
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
235
|
+
reset_on_run: Optional[bool] = None,
|
|
235
236
|
**launcher_kwargs,
|
|
236
237
|
) -> RunObject:
|
|
237
238
|
if local:
|
mlrun/runtimes/local.py
CHANGED
|
@@ -372,8 +372,20 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
372
372
|
return run_obj_dict
|
|
373
373
|
|
|
374
374
|
|
|
375
|
-
def load_module(
|
|
376
|
-
|
|
375
|
+
def load_module(
|
|
376
|
+
file_name: str,
|
|
377
|
+
handler: str,
|
|
378
|
+
context: MLClientCtx,
|
|
379
|
+
embed_in_sys: bool = True,
|
|
380
|
+
):
|
|
381
|
+
"""
|
|
382
|
+
Load module from filename
|
|
383
|
+
:param file_name: The module path to load
|
|
384
|
+
:param handler: The callable to load
|
|
385
|
+
:param context: Execution context
|
|
386
|
+
:param embed_in_sys: Embed the file-named module in sys.modules. This is not persistent with remote
|
|
387
|
+
environments and therefore can effect pickling.
|
|
388
|
+
"""
|
|
377
389
|
module = None
|
|
378
390
|
if file_name:
|
|
379
391
|
path = Path(file_name)
|
|
@@ -384,14 +396,21 @@ def load_module(file_name, handler, context):
|
|
|
384
396
|
if spec is None:
|
|
385
397
|
raise RunError(f"Cannot import from {file_name!r}")
|
|
386
398
|
module = imputil.module_from_spec(spec)
|
|
387
|
-
|
|
399
|
+
if embed_in_sys:
|
|
400
|
+
sys.modules[mod_name] = module
|
|
388
401
|
spec.loader.exec_module(module)
|
|
389
402
|
|
|
390
403
|
class_args = {}
|
|
391
404
|
if context:
|
|
392
405
|
class_args = copy(context._parameters.get("_init_args", {}))
|
|
393
406
|
|
|
394
|
-
return get_handler_extended(
|
|
407
|
+
return get_handler_extended(
|
|
408
|
+
handler,
|
|
409
|
+
context,
|
|
410
|
+
class_args,
|
|
411
|
+
namespaces=module,
|
|
412
|
+
reload_modules=context._reset_on_run,
|
|
413
|
+
)
|
|
395
414
|
|
|
396
415
|
|
|
397
416
|
def run_exec(cmd, args, env=None, cwd=None):
|
|
@@ -263,7 +263,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
263
263
|
is_kfp=False,
|
|
264
264
|
mlrun_version_specifier=None,
|
|
265
265
|
show_on_failure: bool = False,
|
|
266
|
-
skip_access_key_auth: bool = False,
|
|
267
266
|
direct_port_access: bool = False,
|
|
268
267
|
authentication_mode: schemas.APIGatewayAuthenticationMode = None,
|
|
269
268
|
authentication_creds: tuple[str] = None,
|
|
@@ -283,7 +282,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
283
282
|
:param is_kfp: Deploy as part of a kfp pipeline
|
|
284
283
|
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
285
284
|
:param show_on_failure: Show logs only in case of build failure
|
|
286
|
-
:param skip_access_key_auth: Skip adding access key auth to the API Gateway
|
|
287
285
|
:param direct_port_access: Set True to allow direct port access to the application sidecar
|
|
288
286
|
:param authentication_mode: API Gateway authentication mode
|
|
289
287
|
:param authentication_creds: API Gateway authentication credentials as a tuple (username, password)
|
|
@@ -19,6 +19,7 @@ import warnings
|
|
|
19
19
|
from datetime import datetime
|
|
20
20
|
from time import sleep
|
|
21
21
|
|
|
22
|
+
import inflection
|
|
22
23
|
import nuclio
|
|
23
24
|
import nuclio.utils
|
|
24
25
|
import requests
|
|
@@ -65,7 +66,14 @@ def min_nuclio_versions(*versions):
|
|
|
65
66
|
if validate_nuclio_version_compatibility(*versions):
|
|
66
67
|
return function(*args, **kwargs)
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
if function.__name__ == "__init__":
|
|
70
|
+
name = inflection.titleize(function.__qualname__.split(".")[0])
|
|
71
|
+
else:
|
|
72
|
+
name = function.__qualname__
|
|
73
|
+
|
|
74
|
+
message = (
|
|
75
|
+
f"'{name}' function requires Nuclio v{' or v'.join(versions)} or higher"
|
|
76
|
+
)
|
|
69
77
|
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
70
78
|
|
|
71
79
|
return wrapper
|
|
@@ -263,7 +271,8 @@ class RemoteRuntime(KubeResource):
|
|
|
263
271
|
self._status = self._verify_dict(status, "status", NuclioStatus)
|
|
264
272
|
|
|
265
273
|
def pre_deploy_validation(self):
|
|
266
|
-
|
|
274
|
+
if self.metadata.tag:
|
|
275
|
+
mlrun.utils.validate_tag_name(self.metadata.tag, "function.metadata.tag")
|
|
267
276
|
|
|
268
277
|
def set_config(self, key, value):
|
|
269
278
|
self.spec.config[key] = value
|
|
@@ -1318,3 +1327,23 @@ def get_nuclio_deploy_status(
|
|
|
1318
1327
|
else:
|
|
1319
1328
|
text = "\n".join(outputs) if outputs else ""
|
|
1320
1329
|
return state, address, name, last_log_timestamp, text, function_status
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
def enrich_nuclio_function_from_headers(
|
|
1333
|
+
func: RemoteRuntime,
|
|
1334
|
+
headers: dict,
|
|
1335
|
+
):
|
|
1336
|
+
func.status.state = headers.get("x-mlrun-function-status", "")
|
|
1337
|
+
func.status.address = headers.get("x-mlrun-address", "")
|
|
1338
|
+
func.status.nuclio_name = headers.get("x-mlrun-name", "")
|
|
1339
|
+
func.status.internal_invocation_urls = (
|
|
1340
|
+
headers.get("x-mlrun-internal-invocation-urls", "").split(",")
|
|
1341
|
+
if headers.get("x-mlrun-internal-invocation-urls")
|
|
1342
|
+
else []
|
|
1343
|
+
)
|
|
1344
|
+
func.status.external_invocation_urls = (
|
|
1345
|
+
headers.get("x-mlrun-external-invocation-urls", "").split(",")
|
|
1346
|
+
if headers.get("x-mlrun-external-invocation-urls")
|
|
1347
|
+
else []
|
|
1348
|
+
)
|
|
1349
|
+
func.status.container_image = headers.get("x-mlrun-container-image", "")
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -312,15 +312,18 @@ class ServingRuntime(RemoteRuntime):
|
|
|
312
312
|
sample: Optional[int] = None,
|
|
313
313
|
stream_args: Optional[dict] = None,
|
|
314
314
|
tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
|
|
315
|
+
enable_tracking: bool = True,
|
|
315
316
|
) -> None:
|
|
316
317
|
"""apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
317
318
|
and analyze performance.
|
|
318
319
|
|
|
319
|
-
:param stream_path:
|
|
320
|
-
|
|
321
|
-
:param batch:
|
|
322
|
-
:param sample:
|
|
323
|
-
:param stream_args:
|
|
320
|
+
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
321
|
+
you can use the "dummy://" path for test/simulation.
|
|
322
|
+
:param batch: Micro batch size (send micro batches of N records at a time).
|
|
323
|
+
:param sample: Sample size (send only one of N records).
|
|
324
|
+
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
325
|
+
:param enable_tracking: Enabled/Disable model-monitoring tracking.
|
|
326
|
+
Default True (tracking enabled).
|
|
324
327
|
|
|
325
328
|
example::
|
|
326
329
|
|
|
@@ -331,7 +334,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
331
334
|
|
|
332
335
|
"""
|
|
333
336
|
# Applying model monitoring configurations
|
|
334
|
-
self.spec.track_models =
|
|
337
|
+
self.spec.track_models = enable_tracking
|
|
335
338
|
|
|
336
339
|
if stream_path:
|
|
337
340
|
self.spec.parameters["log_stream"] = stream_path
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -532,7 +532,9 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
532
532
|
return
|
|
533
533
|
|
|
534
534
|
# merge node selectors - precedence to existing node selector
|
|
535
|
-
self.node_selector =
|
|
535
|
+
self.node_selector = mlrun.utils.helpers.merge_with_precedence(
|
|
536
|
+
node_selector, self.node_selector
|
|
537
|
+
)
|
|
536
538
|
|
|
537
539
|
def _merge_tolerations(
|
|
538
540
|
self,
|
|
@@ -1038,32 +1040,6 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
|
|
|
1038
1040
|
return True
|
|
1039
1041
|
return False
|
|
1040
1042
|
|
|
1041
|
-
def enrich_runtime_spec(
|
|
1042
|
-
self,
|
|
1043
|
-
project_node_selector: dict[str, str],
|
|
1044
|
-
):
|
|
1045
|
-
"""
|
|
1046
|
-
Enriches the runtime spec with the project-level node selector.
|
|
1047
|
-
|
|
1048
|
-
This method merges the project-level node selector with the existing function node_selector.
|
|
1049
|
-
The merge logic used here combines the two dictionaries, giving precedence to
|
|
1050
|
-
the keys in the runtime node_selector. If there are conflicting keys between the
|
|
1051
|
-
two dictionaries, the values from self.spec.node_selector will overwrite the
|
|
1052
|
-
values from project_node_selector.
|
|
1053
|
-
|
|
1054
|
-
Example:
|
|
1055
|
-
Suppose self.spec.node_selector = {"type": "gpu", "zone": "us-east-1"}
|
|
1056
|
-
and project_node_selector = {"type": "cpu", "environment": "production"}.
|
|
1057
|
-
After the merge, the resulting node_selector will be:
|
|
1058
|
-
{"type": "gpu", "zone": "us-east-1", "environment": "production"}
|
|
1059
|
-
|
|
1060
|
-
Note:
|
|
1061
|
-
- The merge uses the ** operator, also known as the "unpacking" operator in Python,
|
|
1062
|
-
combining key-value pairs from each dictionary. Later dictionaries take precedence
|
|
1063
|
-
when there are conflicting keys.
|
|
1064
|
-
"""
|
|
1065
|
-
self.spec.node_selector = {**project_node_selector, **self.spec.node_selector}
|
|
1066
|
-
|
|
1067
1043
|
def _set_env(self, name, value=None, value_from=None):
|
|
1068
1044
|
new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
|
|
1069
1045
|
|
|
@@ -1542,7 +1518,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
|
|
|
1542
1518
|
|
|
1543
1519
|
# check if attribute of type dict, and then check if type is sanitized
|
|
1544
1520
|
if isinstance(attribute, dict):
|
|
1545
|
-
if attribute_config["not_sanitized_class"]
|
|
1521
|
+
if not isinstance(attribute_config["not_sanitized_class"], dict):
|
|
1546
1522
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
1547
1523
|
f"expected to be of type {attribute_config.get('not_sanitized_class')} but got dict"
|
|
1548
1524
|
)
|
|
@@ -1552,7 +1528,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
|
|
|
1552
1528
|
elif isinstance(attribute, list) and not isinstance(
|
|
1553
1529
|
attribute[0], attribute_config["sub_attribute_type"]
|
|
1554
1530
|
):
|
|
1555
|
-
if attribute_config["not_sanitized_class"]
|
|
1531
|
+
if not isinstance(attribute_config["not_sanitized_class"], list):
|
|
1556
1532
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
1557
1533
|
f"expected to be of type {attribute_config.get('not_sanitized_class')} but got list"
|
|
1558
1534
|
)
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -130,14 +130,20 @@ class RemoteSparkRuntime(KubejobRuntime):
|
|
|
130
130
|
def spec(self, spec):
|
|
131
131
|
self._spec = self._verify_dict(spec, "spec", RemoteSparkSpec)
|
|
132
132
|
|
|
133
|
-
def with_spark_service(
|
|
133
|
+
def with_spark_service(
|
|
134
|
+
self,
|
|
135
|
+
spark_service,
|
|
136
|
+
provider=RemoteSparkProviders.iguazio,
|
|
137
|
+
with_v3io_mount=True,
|
|
138
|
+
):
|
|
134
139
|
"""Attach spark service to function"""
|
|
135
140
|
self.spec.provider = provider
|
|
136
141
|
if provider == RemoteSparkProviders.iguazio:
|
|
137
142
|
self.spec.env.append(
|
|
138
143
|
{"name": "MLRUN_SPARK_CLIENT_IGZ_SPARK", "value": "true"}
|
|
139
144
|
)
|
|
140
|
-
|
|
145
|
+
if with_v3io_mount:
|
|
146
|
+
self.apply(mount_v3io())
|
|
141
147
|
self.apply(
|
|
142
148
|
mount_v3iod(
|
|
143
149
|
namespace=config.namespace,
|
mlrun/serving/__init__.py
CHANGED
|
@@ -22,10 +22,17 @@ __all__ = [
|
|
|
22
22
|
"RouterStep",
|
|
23
23
|
"QueueStep",
|
|
24
24
|
"ErrorStep",
|
|
25
|
+
"MonitoringApplicationStep",
|
|
25
26
|
]
|
|
26
27
|
|
|
27
28
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
28
29
|
from .server import GraphContext, GraphServer, create_graph_server # noqa
|
|
29
|
-
from .states import
|
|
30
|
+
from .states import (
|
|
31
|
+
ErrorStep,
|
|
32
|
+
QueueStep,
|
|
33
|
+
RouterStep,
|
|
34
|
+
TaskStep,
|
|
35
|
+
MonitoringApplicationStep,
|
|
36
|
+
) # noqa
|
|
30
37
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
31
38
|
from .v2_serving import V2ModelServer # noqa
|
mlrun/serving/routers.py
CHANGED
|
@@ -1030,74 +1030,90 @@ def _init_endpoint_record(
|
|
|
1030
1030
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
1031
1031
|
).uid
|
|
1032
1032
|
|
|
1033
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
1034
1033
|
try:
|
|
1035
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
1036
|
-
|
|
1034
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
1035
|
+
project=project, endpoint_id=endpoint_uid
|
|
1036
|
+
)
|
|
1037
1037
|
except mlrun.errors.MLRunNotFoundError:
|
|
1038
|
-
|
|
1038
|
+
model_ep = None
|
|
1039
|
+
except mlrun.errors.MLRunBadRequestError as err:
|
|
1040
|
+
logger.debug(
|
|
1041
|
+
f"Cant reach to model endpoints store, due to : {err}",
|
|
1042
|
+
)
|
|
1043
|
+
return
|
|
1039
1044
|
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
),
|
|
1058
|
-
active=True,
|
|
1059
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1060
|
-
if voting_ensemble.context.server.track_models
|
|
1061
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
1062
|
-
),
|
|
1063
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1064
|
-
children=list(voting_ensemble.routes.keys()),
|
|
1065
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1066
|
-
children_uids=children_uids,
|
|
1045
|
+
if voting_ensemble.context.server.track_models and not model_ep:
|
|
1046
|
+
logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
|
|
1047
|
+
# Get the children model endpoints ids
|
|
1048
|
+
children_uids = []
|
|
1049
|
+
for _, c in voting_ensemble.routes.items():
|
|
1050
|
+
if hasattr(c, "endpoint_uid"):
|
|
1051
|
+
children_uids.append(c.endpoint_uid)
|
|
1052
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
1053
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
1054
|
+
project=project, uid=endpoint_uid
|
|
1055
|
+
),
|
|
1056
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
1057
|
+
function_uri=graph_server.function_uri,
|
|
1058
|
+
model=versioned_model_name,
|
|
1059
|
+
model_class=voting_ensemble.__class__.__name__,
|
|
1060
|
+
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1061
|
+
project=project, kind="stream"
|
|
1067
1062
|
),
|
|
1068
|
-
|
|
1063
|
+
active=True,
|
|
1064
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
1065
|
+
),
|
|
1066
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
1067
|
+
children=list(voting_ensemble.routes.keys()),
|
|
1068
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
|
|
1069
|
+
children_uids=children_uids,
|
|
1070
|
+
),
|
|
1071
|
+
)
|
|
1069
1072
|
|
|
1070
|
-
|
|
1073
|
+
db = mlrun.get_run_db()
|
|
1074
|
+
|
|
1075
|
+
db.create_model_endpoint(
|
|
1076
|
+
project=project,
|
|
1077
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
1078
|
+
model_endpoint=model_endpoint.dict(),
|
|
1079
|
+
)
|
|
1071
1080
|
|
|
1081
|
+
# Update model endpoint children type
|
|
1082
|
+
for model_endpoint in children_uids:
|
|
1083
|
+
current_endpoint = db.get_model_endpoint(
|
|
1084
|
+
project=project, endpoint_id=model_endpoint
|
|
1085
|
+
)
|
|
1086
|
+
current_endpoint.status.endpoint_type = (
|
|
1087
|
+
mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
|
|
1088
|
+
)
|
|
1072
1089
|
db.create_model_endpoint(
|
|
1073
1090
|
project=project,
|
|
1074
|
-
endpoint_id=model_endpoint
|
|
1075
|
-
model_endpoint=
|
|
1076
|
-
)
|
|
1077
|
-
|
|
1078
|
-
# Update model endpoint children type
|
|
1079
|
-
for model_endpoint in children_uids:
|
|
1080
|
-
current_endpoint = db.get_model_endpoint(
|
|
1081
|
-
project=project, endpoint_id=model_endpoint
|
|
1082
|
-
)
|
|
1083
|
-
current_endpoint.status.endpoint_type = (
|
|
1084
|
-
mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
|
|
1085
|
-
)
|
|
1086
|
-
db.create_model_endpoint(
|
|
1087
|
-
project=project,
|
|
1088
|
-
endpoint_id=model_endpoint,
|
|
1089
|
-
model_endpoint=current_endpoint,
|
|
1090
|
-
)
|
|
1091
|
-
|
|
1092
|
-
except Exception as exc:
|
|
1093
|
-
logger.warning(
|
|
1094
|
-
"Failed creating model endpoint record",
|
|
1095
|
-
exc=err_to_str(exc),
|
|
1096
|
-
traceback=traceback.format_exc(),
|
|
1091
|
+
endpoint_id=model_endpoint,
|
|
1092
|
+
model_endpoint=current_endpoint,
|
|
1097
1093
|
)
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1094
|
+
elif (
|
|
1095
|
+
model_ep
|
|
1096
|
+
and (
|
|
1097
|
+
model_ep.spec.monitoring_mode
|
|
1098
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1099
|
+
)
|
|
1100
|
+
!= voting_ensemble.context.server.track_models
|
|
1101
|
+
):
|
|
1102
|
+
monitoring_mode = (
|
|
1103
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
1104
|
+
if voting_ensemble.context.server.track_models
|
|
1105
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
1106
|
+
)
|
|
1107
|
+
db = mlrun.get_run_db()
|
|
1108
|
+
db.patch_model_endpoint(
|
|
1109
|
+
project=project,
|
|
1110
|
+
endpoint_id=endpoint_uid,
|
|
1111
|
+
attributes={"monitoring_mode": monitoring_mode},
|
|
1112
|
+
)
|
|
1113
|
+
logger.debug(
|
|
1114
|
+
f"Updating model endpoint monitoring_mode to {monitoring_mode}",
|
|
1115
|
+
endpoint_id=endpoint_uid,
|
|
1116
|
+
)
|
|
1101
1117
|
|
|
1102
1118
|
return endpoint_uid
|
|
1103
1119
|
|
mlrun/serving/server.py
CHANGED
|
@@ -383,6 +383,17 @@ def v2_serving_handler(context, event, get_body=False):
|
|
|
383
383
|
if event.body == b"":
|
|
384
384
|
event.body = None
|
|
385
385
|
|
|
386
|
+
# original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
|
|
387
|
+
# workaround for NUC-178
|
|
388
|
+
event.stream_path = event.path
|
|
389
|
+
if hasattr(event, "trigger") and event.trigger.kind in (
|
|
390
|
+
"kafka",
|
|
391
|
+
"kafka-cluster",
|
|
392
|
+
"v3ioStream",
|
|
393
|
+
"v3io-stream",
|
|
394
|
+
):
|
|
395
|
+
event.path = "/"
|
|
396
|
+
|
|
386
397
|
return context._server.run(event, context, get_body)
|
|
387
398
|
|
|
388
399
|
|