mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/utils/helpers.py
CHANGED
|
@@ -15,13 +15,13 @@
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import base64
|
|
17
17
|
import enum
|
|
18
|
-
import functools
|
|
19
18
|
import gzip
|
|
20
19
|
import hashlib
|
|
21
20
|
import inspect
|
|
22
21
|
import itertools
|
|
23
22
|
import json
|
|
24
23
|
import os
|
|
24
|
+
import pathlib
|
|
25
25
|
import re
|
|
26
26
|
import string
|
|
27
27
|
import sys
|
|
@@ -29,6 +29,7 @@ import traceback
|
|
|
29
29
|
import typing
|
|
30
30
|
import uuid
|
|
31
31
|
import warnings
|
|
32
|
+
from copy import deepcopy
|
|
32
33
|
from datetime import datetime, timedelta, timezone
|
|
33
34
|
from importlib import import_module, reload
|
|
34
35
|
from os import path
|
|
@@ -45,6 +46,8 @@ import pytz
|
|
|
45
46
|
import semver
|
|
46
47
|
import yaml
|
|
47
48
|
from dateutil import parser
|
|
49
|
+
from packaging.requirements import Requirement
|
|
50
|
+
from packaging.utils import canonicalize_name
|
|
48
51
|
from pandas import Timedelta, Timestamp
|
|
49
52
|
from yaml.representer import RepresenterError
|
|
50
53
|
|
|
@@ -61,6 +64,7 @@ import mlrun_pipelines.models
|
|
|
61
64
|
import mlrun_pipelines.utils
|
|
62
65
|
from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
|
|
63
66
|
from mlrun.common.schemas import ArtifactCategories
|
|
67
|
+
from mlrun.common.schemas.hub import HubSourceType
|
|
64
68
|
from mlrun.config import config
|
|
65
69
|
from mlrun_pipelines.models import PipelineRun
|
|
66
70
|
|
|
@@ -162,14 +166,6 @@ def get_artifact_target(item: dict, project=None):
|
|
|
162
166
|
return item["spec"].get("target_path")
|
|
163
167
|
|
|
164
168
|
|
|
165
|
-
# TODO: Remove once data migration v5 is obsolete
|
|
166
|
-
def is_legacy_artifact(artifact):
|
|
167
|
-
if isinstance(artifact, dict):
|
|
168
|
-
return "metadata" not in artifact
|
|
169
|
-
else:
|
|
170
|
-
return not hasattr(artifact, "metadata")
|
|
171
|
-
|
|
172
|
-
|
|
173
169
|
logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
|
|
174
170
|
missing = object()
|
|
175
171
|
|
|
@@ -257,6 +253,40 @@ def verify_field_regex(
|
|
|
257
253
|
return False
|
|
258
254
|
|
|
259
255
|
|
|
256
|
+
def validate_function_name(name: str) -> None:
|
|
257
|
+
"""
|
|
258
|
+
Validate that a function name conforms to Kubernetes DNS-1123 label requirements.
|
|
259
|
+
|
|
260
|
+
Function names for Kubernetes resources must:
|
|
261
|
+
- Be lowercase alphanumeric characters or '-'
|
|
262
|
+
- Start and end with an alphanumeric character
|
|
263
|
+
- Be at most 63 characters long
|
|
264
|
+
|
|
265
|
+
This validation should be called AFTER normalize_name() has been applied.
|
|
266
|
+
|
|
267
|
+
Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
|
|
268
|
+
|
|
269
|
+
:param name: The function name to validate (after normalization)
|
|
270
|
+
:raises MLRunInvalidArgumentError: If the function name is invalid for Kubernetes
|
|
271
|
+
"""
|
|
272
|
+
if not name:
|
|
273
|
+
return
|
|
274
|
+
|
|
275
|
+
verify_field_regex(
|
|
276
|
+
"function.metadata.name",
|
|
277
|
+
name,
|
|
278
|
+
mlrun.utils.regex.dns_1123_label,
|
|
279
|
+
raise_on_failure=True,
|
|
280
|
+
log_message=(
|
|
281
|
+
f"Function name '{name}' is invalid. "
|
|
282
|
+
"Kubernetes function names must be DNS-1123 labels: "
|
|
283
|
+
"lowercase alphanumeric characters or '-', "
|
|
284
|
+
"starting and ending with an alphanumeric character, "
|
|
285
|
+
"and at most 63 characters long."
|
|
286
|
+
),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
|
|
260
290
|
def validate_builder_source(
|
|
261
291
|
source: str, pull_at_runtime: bool = False, workdir: Optional[str] = None
|
|
262
292
|
):
|
|
@@ -471,21 +501,49 @@ def to_date_str(d):
|
|
|
471
501
|
return ""
|
|
472
502
|
|
|
473
503
|
|
|
474
|
-
def normalize_name(name: str
|
|
504
|
+
def normalize_name(name: str):
|
|
475
505
|
# TODO: Must match
|
|
476
506
|
# [a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?
|
|
477
507
|
name = re.sub(r"\s+", "-", name)
|
|
478
508
|
if "_" in name:
|
|
479
|
-
if verbose:
|
|
480
|
-
warnings.warn(
|
|
481
|
-
"Names with underscore '_' are about to be deprecated, use dashes '-' instead. "
|
|
482
|
-
f"Replacing '{name}' underscores with dashes.",
|
|
483
|
-
FutureWarning,
|
|
484
|
-
)
|
|
485
509
|
name = name.replace("_", "-")
|
|
486
510
|
return name.lower()
|
|
487
511
|
|
|
488
512
|
|
|
513
|
+
def ensure_batch_job_suffix(
|
|
514
|
+
function_name: typing.Optional[str],
|
|
515
|
+
) -> tuple[typing.Optional[str], bool, str]:
|
|
516
|
+
"""
|
|
517
|
+
Ensure that a function name has the batch job suffix appended to prevent database collision.
|
|
518
|
+
|
|
519
|
+
This helper is used by to_job() methods in runtimes that convert online functions (serving, local)
|
|
520
|
+
to batch processing jobs. The suffix prevents the job from overwriting the original function in
|
|
521
|
+
the database when both are stored with the same (project, name) key.
|
|
522
|
+
|
|
523
|
+
:param function_name: The original function name (can be None or empty string)
|
|
524
|
+
|
|
525
|
+
:return: A tuple of (modified_name, was_renamed, suffix) where:
|
|
526
|
+
- modified_name: The function name with the batch suffix (if not already present),
|
|
527
|
+
or empty string if input was empty
|
|
528
|
+
- was_renamed: True if the suffix was added, False if it was already present or if name was empty/None
|
|
529
|
+
- suffix: The suffix value that was used (or would have been used)
|
|
530
|
+
|
|
531
|
+
"""
|
|
532
|
+
suffix = mlrun_constants.RESERVED_BATCH_JOB_SUFFIX
|
|
533
|
+
|
|
534
|
+
# Handle None or empty string
|
|
535
|
+
if not function_name:
|
|
536
|
+
return function_name, False, suffix
|
|
537
|
+
|
|
538
|
+
if not function_name.endswith(suffix):
|
|
539
|
+
return (
|
|
540
|
+
f"{function_name}{suffix}",
|
|
541
|
+
True,
|
|
542
|
+
suffix,
|
|
543
|
+
)
|
|
544
|
+
return function_name, False, suffix
|
|
545
|
+
|
|
546
|
+
|
|
489
547
|
class LogBatchWriter:
|
|
490
548
|
def __init__(self, func, batch=16, maxtime=5):
|
|
491
549
|
self.batch = batch
|
|
@@ -515,9 +573,14 @@ def get_in(obj, keys, default=None):
|
|
|
515
573
|
if isinstance(keys, str):
|
|
516
574
|
keys = keys.split(".")
|
|
517
575
|
for key in keys:
|
|
518
|
-
if
|
|
576
|
+
if obj is None:
|
|
519
577
|
return default
|
|
520
|
-
obj
|
|
578
|
+
if isinstance(obj, dict):
|
|
579
|
+
if key not in obj:
|
|
580
|
+
return default
|
|
581
|
+
obj = obj[key]
|
|
582
|
+
else:
|
|
583
|
+
obj = getattr(obj, key, default)
|
|
521
584
|
return obj
|
|
522
585
|
|
|
523
586
|
|
|
@@ -794,17 +857,48 @@ def generate_artifact_uri(
|
|
|
794
857
|
return artifact_uri
|
|
795
858
|
|
|
796
859
|
|
|
797
|
-
def
|
|
860
|
+
def remove_tag_from_artifact_uri(uri: str) -> Optional[str]:
|
|
798
861
|
"""
|
|
799
|
-
|
|
862
|
+
Remove the `:<tag>` part from a URI with pattern:
|
|
863
|
+
[store://][<project>/]<key>[#<iter>][:<tag>][@<tree>][^<uid>]
|
|
864
|
+
|
|
865
|
+
Returns the URI without the tag section.
|
|
866
|
+
|
|
867
|
+
Examples:
|
|
868
|
+
"store://proj/key:latest" => "store://proj/key"
|
|
869
|
+
"key#1:dev@tree^uid" => "key#1@tree^uid"
|
|
870
|
+
"store://key:tag" => "store://key"
|
|
871
|
+
"store://models/remote-model-project/my_model#0@tree" => unchanged (no tag)
|
|
872
|
+
"""
|
|
873
|
+
add_store = False
|
|
874
|
+
if mlrun.datastore.is_store_uri(uri):
|
|
875
|
+
uri = uri.removeprefix(DB_SCHEMA + "://")
|
|
876
|
+
add_store = True
|
|
877
|
+
uri = re.sub(r"(#[^:@\s]*)?:[^@^:\s]+(?=(@|\^|$))", lambda m: m.group(1) or "", uri)
|
|
878
|
+
return uri if not add_store else DB_SCHEMA + "://" + uri
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def check_if_hub_uri(uri: str) -> bool:
|
|
882
|
+
return uri.startswith(hub_prefix)
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def extend_hub_uri_if_needed(
|
|
886
|
+
uri: str,
|
|
887
|
+
asset_type: HubSourceType = HubSourceType.functions,
|
|
888
|
+
file: str = "function.yaml",
|
|
889
|
+
) -> tuple[str, bool]:
|
|
890
|
+
"""
|
|
891
|
+
Retrieve the full uri of an object in the hub.
|
|
800
892
|
|
|
801
893
|
:param uri: structure: "hub://[<source>/]<item-name>[:<tag>]"
|
|
894
|
+
:param asset_type: The type of the hub item (functions, modules, etc.)
|
|
895
|
+
:param file: The file name inside the hub item directory (default: function.yaml)
|
|
802
896
|
|
|
803
897
|
:return: A tuple of:
|
|
804
898
|
[0] = Extended URI of item
|
|
805
899
|
[1] = Is hub item (bool)
|
|
806
900
|
"""
|
|
807
|
-
is_hub_uri = uri
|
|
901
|
+
is_hub_uri = check_if_hub_uri(uri)
|
|
808
902
|
if not is_hub_uri:
|
|
809
903
|
return uri, is_hub_uri
|
|
810
904
|
|
|
@@ -821,10 +915,10 @@ def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
|
|
|
821
915
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
822
916
|
"Invalid character '/' in function name or source name"
|
|
823
917
|
) from exc
|
|
824
|
-
name = normalize_name(name=name
|
|
918
|
+
name = normalize_name(name=name)
|
|
825
919
|
if not source_name:
|
|
826
920
|
# Searching item in all sources
|
|
827
|
-
sources = db.list_hub_sources(item_name=name, tag=tag)
|
|
921
|
+
sources = db.list_hub_sources(item_name=name, tag=tag, item_type=asset_type)
|
|
828
922
|
if not sources:
|
|
829
923
|
raise mlrun.errors.MLRunNotFoundError(
|
|
830
924
|
f"Item={name}, tag={tag} not found in any hub source"
|
|
@@ -834,10 +928,10 @@ def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
|
|
|
834
928
|
else:
|
|
835
929
|
# Specific source is given
|
|
836
930
|
indexed_source = db.get_hub_source(source_name)
|
|
837
|
-
# hub
|
|
931
|
+
# hub directories name are with underscores instead of hyphens
|
|
838
932
|
name = name.replace("-", "_")
|
|
839
|
-
|
|
840
|
-
return indexed_source.source.get_full_uri(
|
|
933
|
+
suffix = f"{name}/{tag}/src/{file}"
|
|
934
|
+
return indexed_source.source.get_full_uri(suffix, asset_type), is_hub_uri
|
|
841
935
|
|
|
842
936
|
|
|
843
937
|
def gen_md_table(header, rows=None):
|
|
@@ -905,10 +999,20 @@ def enrich_image_url(
|
|
|
905
999
|
mlrun_version = config.images_tag or client_version or server_version
|
|
906
1000
|
tag = mlrun_version or ""
|
|
907
1001
|
|
|
908
|
-
#
|
|
909
|
-
|
|
1002
|
+
# starting mlrun 1.10.0-rc0 we want to enrich the kfp image with the python version
|
|
1003
|
+
# e.g for 1.9 we have a single mlrun-kfp image that supports only python 3.9
|
|
1004
|
+
enrich_kfp_python_version = (
|
|
1005
|
+
"mlrun-kfp" in image_url
|
|
1006
|
+
and mlrun_version
|
|
1007
|
+
and semver.VersionInfo.is_valid(mlrun_version)
|
|
1008
|
+
and semver.VersionInfo.parse(mlrun_version)
|
|
1009
|
+
>= semver.VersionInfo.parse("1.10.0-rc0")
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
if "mlrun-kfp" not in image_url or enrich_kfp_python_version:
|
|
910
1013
|
tag += resolve_image_tag_suffix(
|
|
911
|
-
mlrun_version=mlrun_version,
|
|
1014
|
+
mlrun_version=mlrun_version,
|
|
1015
|
+
python_version=client_python_version,
|
|
912
1016
|
)
|
|
913
1017
|
|
|
914
1018
|
# it's an mlrun image if the repository is mlrun
|
|
@@ -934,8 +1038,15 @@ def enrich_image_url(
|
|
|
934
1038
|
else:
|
|
935
1039
|
image_url = "mlrun/mlrun"
|
|
936
1040
|
|
|
937
|
-
if is_mlrun_image and tag
|
|
938
|
-
|
|
1041
|
+
if is_mlrun_image and tag:
|
|
1042
|
+
if ":" not in image_url:
|
|
1043
|
+
image_url = f"{image_url}:{tag}"
|
|
1044
|
+
elif enrich_kfp_python_version:
|
|
1045
|
+
# For mlrun-kfp >= 1.10.0-rc0, append python suffix to existing tag
|
|
1046
|
+
python_suffix = resolve_image_tag_suffix(
|
|
1047
|
+
mlrun_version, client_python_version
|
|
1048
|
+
)
|
|
1049
|
+
image_url = f"{image_url}{python_suffix}" if python_suffix else image_url
|
|
939
1050
|
|
|
940
1051
|
registry = (
|
|
941
1052
|
config.images_registry if is_mlrun_image else config.vendor_images_registry
|
|
@@ -1050,7 +1161,14 @@ def fill_function_hash(function_dict, tag=""):
|
|
|
1050
1161
|
|
|
1051
1162
|
|
|
1052
1163
|
def retry_until_successful(
|
|
1053
|
-
backoff: int,
|
|
1164
|
+
backoff: int,
|
|
1165
|
+
timeout: int,
|
|
1166
|
+
logger,
|
|
1167
|
+
verbose: bool,
|
|
1168
|
+
_function,
|
|
1169
|
+
*args,
|
|
1170
|
+
fatal_exceptions=(),
|
|
1171
|
+
**kwargs,
|
|
1054
1172
|
):
|
|
1055
1173
|
"""
|
|
1056
1174
|
Runs function with given *args and **kwargs.
|
|
@@ -1063,14 +1181,31 @@ def retry_until_successful(
|
|
|
1063
1181
|
:param verbose: whether to log the failure on each retry
|
|
1064
1182
|
:param _function: function to run
|
|
1065
1183
|
:param args: functions args
|
|
1184
|
+
:param fatal_exceptions: exception types that should not be retried
|
|
1066
1185
|
:param kwargs: functions kwargs
|
|
1067
1186
|
:return: function result
|
|
1068
1187
|
"""
|
|
1069
|
-
return Retryer(
|
|
1188
|
+
return Retryer(
|
|
1189
|
+
backoff,
|
|
1190
|
+
timeout,
|
|
1191
|
+
logger,
|
|
1192
|
+
verbose,
|
|
1193
|
+
_function,
|
|
1194
|
+
*args,
|
|
1195
|
+
fatal_exceptions=fatal_exceptions,
|
|
1196
|
+
**kwargs,
|
|
1197
|
+
).run()
|
|
1070
1198
|
|
|
1071
1199
|
|
|
1072
1200
|
async def retry_until_successful_async(
|
|
1073
|
-
backoff: int,
|
|
1201
|
+
backoff: int,
|
|
1202
|
+
timeout: int,
|
|
1203
|
+
logger,
|
|
1204
|
+
verbose: bool,
|
|
1205
|
+
_function,
|
|
1206
|
+
*args,
|
|
1207
|
+
fatal_exceptions=(),
|
|
1208
|
+
**kwargs,
|
|
1074
1209
|
):
|
|
1075
1210
|
"""
|
|
1076
1211
|
Runs function with given *args and **kwargs.
|
|
@@ -1082,12 +1217,20 @@ async def retry_until_successful_async(
|
|
|
1082
1217
|
:param logger: a logger so we can log the failures
|
|
1083
1218
|
:param verbose: whether to log the failure on each retry
|
|
1084
1219
|
:param _function: function to run
|
|
1220
|
+
:param fatal_exceptions: exception types that should not be retried
|
|
1085
1221
|
:param args: functions args
|
|
1086
1222
|
:param kwargs: functions kwargs
|
|
1087
1223
|
:return: function result
|
|
1088
1224
|
"""
|
|
1089
1225
|
return await AsyncRetryer(
|
|
1090
|
-
backoff,
|
|
1226
|
+
backoff,
|
|
1227
|
+
timeout,
|
|
1228
|
+
logger,
|
|
1229
|
+
verbose,
|
|
1230
|
+
_function,
|
|
1231
|
+
*args,
|
|
1232
|
+
fatal_exceptions=fatal_exceptions,
|
|
1233
|
+
**kwargs,
|
|
1091
1234
|
).run()
|
|
1092
1235
|
|
|
1093
1236
|
|
|
@@ -1173,55 +1316,6 @@ def get_workflow_url(
|
|
|
1173
1316
|
return url
|
|
1174
1317
|
|
|
1175
1318
|
|
|
1176
|
-
def get_kfp_list_runs_filter(
|
|
1177
|
-
project_name: Optional[str] = None,
|
|
1178
|
-
end_date: Optional[str] = None,
|
|
1179
|
-
start_date: Optional[str] = None,
|
|
1180
|
-
) -> str:
|
|
1181
|
-
"""
|
|
1182
|
-
Generates a filter for listing Kubeflow Pipelines (KFP) runs.
|
|
1183
|
-
|
|
1184
|
-
:param project_name: The name of the project. If "*", it won't filter by project.
|
|
1185
|
-
:param end_date: The latest creation date for filtering runs (ISO 8601 format).
|
|
1186
|
-
:param start_date: The earliest creation date for filtering runs (ISO 8601 format).
|
|
1187
|
-
:return: A JSON-formatted filter string for KFP.
|
|
1188
|
-
"""
|
|
1189
|
-
|
|
1190
|
-
# KFP filter operation codes
|
|
1191
|
-
kfp_less_than_or_equal_op = 7 # '<='
|
|
1192
|
-
kfp_greater_than_or_equal_op = 5 # '>='
|
|
1193
|
-
kfp_substring_op = 9 # Substring match
|
|
1194
|
-
|
|
1195
|
-
filters = {"predicates": []}
|
|
1196
|
-
|
|
1197
|
-
if end_date:
|
|
1198
|
-
filters["predicates"].append(
|
|
1199
|
-
{
|
|
1200
|
-
"key": "created_at",
|
|
1201
|
-
"op": kfp_less_than_or_equal_op,
|
|
1202
|
-
"timestamp_value": end_date,
|
|
1203
|
-
}
|
|
1204
|
-
)
|
|
1205
|
-
|
|
1206
|
-
if project_name and project_name != "*":
|
|
1207
|
-
filters["predicates"].append(
|
|
1208
|
-
{
|
|
1209
|
-
"key": "name",
|
|
1210
|
-
"op": kfp_substring_op,
|
|
1211
|
-
"string_value": project_name,
|
|
1212
|
-
}
|
|
1213
|
-
)
|
|
1214
|
-
if start_date:
|
|
1215
|
-
filters["predicates"].append(
|
|
1216
|
-
{
|
|
1217
|
-
"key": "created_at",
|
|
1218
|
-
"op": kfp_greater_than_or_equal_op,
|
|
1219
|
-
"timestamp_value": start_date,
|
|
1220
|
-
}
|
|
1221
|
-
)
|
|
1222
|
-
return json.dumps(filters)
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
1319
|
def validate_and_convert_date(date_input: str) -> str:
|
|
1226
1320
|
"""
|
|
1227
1321
|
Converts any recognizable date string into a standardized RFC 3339 format.
|
|
@@ -1819,10 +1913,7 @@ async def run_in_threadpool(func, *args, **kwargs):
|
|
|
1819
1913
|
Run a sync-function in the loop default thread pool executor pool and await its result.
|
|
1820
1914
|
Note that this function is not suitable for CPU-bound tasks, as it will block the event loop.
|
|
1821
1915
|
"""
|
|
1822
|
-
|
|
1823
|
-
if kwargs:
|
|
1824
|
-
func = functools.partial(func, **kwargs)
|
|
1825
|
-
return await loop.run_in_executor(None, func, *args)
|
|
1916
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
1826
1917
|
|
|
1827
1918
|
|
|
1828
1919
|
def is_explicit_ack_supported(context):
|
|
@@ -2352,3 +2443,112 @@ def encode_user_code(
|
|
|
2352
2443
|
"Consider using `with_source_archive` to add user code as a remote source to the function."
|
|
2353
2444
|
)
|
|
2354
2445
|
return encoded
|
|
2446
|
+
|
|
2447
|
+
|
|
2448
|
+
def split_path(path: str) -> typing.Union[str, list[str], None]:
|
|
2449
|
+
if path is not None:
|
|
2450
|
+
parsed_path = path.split(".")
|
|
2451
|
+
if len(parsed_path) == 1:
|
|
2452
|
+
parsed_path = parsed_path[0]
|
|
2453
|
+
return parsed_path
|
|
2454
|
+
return path
|
|
2455
|
+
|
|
2456
|
+
|
|
2457
|
+
def get_data_from_path(path: typing.Union[str, list[str], None], data: dict) -> Any:
|
|
2458
|
+
if isinstance(path, str):
|
|
2459
|
+
output_data = data.get(path)
|
|
2460
|
+
elif isinstance(path, list):
|
|
2461
|
+
output_data = deepcopy(data)
|
|
2462
|
+
for key in path:
|
|
2463
|
+
output_data = output_data.get(key, {})
|
|
2464
|
+
elif path is None:
|
|
2465
|
+
output_data = data
|
|
2466
|
+
else:
|
|
2467
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2468
|
+
"Expected path be of type str or list of str or None"
|
|
2469
|
+
)
|
|
2470
|
+
return output_data
|
|
2471
|
+
|
|
2472
|
+
|
|
2473
|
+
def is_valid_port(port: int, raise_on_error: bool = False) -> bool:
|
|
2474
|
+
if not port:
|
|
2475
|
+
return False
|
|
2476
|
+
if 0 <= port <= 65535:
|
|
2477
|
+
return True
|
|
2478
|
+
if raise_on_error:
|
|
2479
|
+
raise ValueError("Port must be in the range 0–65535")
|
|
2480
|
+
return False
|
|
2481
|
+
|
|
2482
|
+
|
|
2483
|
+
def set_data_by_path(
|
|
2484
|
+
path: typing.Union[str, list[str], None], data: dict, value
|
|
2485
|
+
) -> None:
|
|
2486
|
+
if path is None:
|
|
2487
|
+
if not isinstance(value, dict):
|
|
2488
|
+
raise ValueError("When path is None, value must be a dictionary.")
|
|
2489
|
+
data.update(value)
|
|
2490
|
+
|
|
2491
|
+
elif isinstance(path, str):
|
|
2492
|
+
data[path] = value
|
|
2493
|
+
|
|
2494
|
+
elif isinstance(path, list):
|
|
2495
|
+
current = data
|
|
2496
|
+
for key in path[:-1]:
|
|
2497
|
+
if key not in current or not isinstance(current[key], dict):
|
|
2498
|
+
current[key] = {}
|
|
2499
|
+
current = current[key]
|
|
2500
|
+
current[path[-1]] = value
|
|
2501
|
+
else:
|
|
2502
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2503
|
+
"Expected path to be of type str or list of str"
|
|
2504
|
+
)
|
|
2505
|
+
|
|
2506
|
+
|
|
2507
|
+
def _normalize_requirements(reqs: typing.Union[str, list[str], None]) -> list[str]:
|
|
2508
|
+
if reqs is None:
|
|
2509
|
+
return []
|
|
2510
|
+
if isinstance(reqs, str):
|
|
2511
|
+
s = reqs.strip()
|
|
2512
|
+
return [s] if s else []
|
|
2513
|
+
return [s.strip() for s in reqs if s and s.strip()]
|
|
2514
|
+
|
|
2515
|
+
|
|
2516
|
+
def merge_requirements(
|
|
2517
|
+
reqs_priority: typing.Union[str, list[str], None],
|
|
2518
|
+
reqs_secondary: typing.Union[str, list[str], None],
|
|
2519
|
+
) -> list[str]:
|
|
2520
|
+
"""
|
|
2521
|
+
Merge two requirement collections into a union. If the same package
|
|
2522
|
+
appears in both, the specifier from reqs_priority wins.
|
|
2523
|
+
|
|
2524
|
+
Args:
|
|
2525
|
+
reqs_priority: str | list[str] | None (priority input)
|
|
2526
|
+
reqs_secondary: str | list[str] | None
|
|
2527
|
+
|
|
2528
|
+
Returns:
|
|
2529
|
+
list[str]: pip-style requirements.
|
|
2530
|
+
"""
|
|
2531
|
+
merged: dict[str, Requirement] = {}
|
|
2532
|
+
|
|
2533
|
+
for r in _normalize_requirements(reqs_secondary) + _normalize_requirements(
|
|
2534
|
+
reqs_priority
|
|
2535
|
+
):
|
|
2536
|
+
req = Requirement(r)
|
|
2537
|
+
merged[canonicalize_name(req.name)] = req
|
|
2538
|
+
|
|
2539
|
+
return [str(req) for req in merged.values()]
|
|
2540
|
+
|
|
2541
|
+
|
|
2542
|
+
def get_source_and_working_dir_paths(source_file_path) -> (pathlib.Path, pathlib.Path):
|
|
2543
|
+
source_file_path_object = pathlib.Path(source_file_path).resolve()
|
|
2544
|
+
working_dir_path_object = pathlib.Path(".").resolve()
|
|
2545
|
+
return source_file_path_object, working_dir_path_object
|
|
2546
|
+
|
|
2547
|
+
|
|
2548
|
+
def get_relative_module_name_from_path(
|
|
2549
|
+
source_file_path_object, working_dir_path_object
|
|
2550
|
+
) -> str:
|
|
2551
|
+
relative_path_to_source_file = source_file_path_object.relative_to(
|
|
2552
|
+
working_dir_path_object
|
|
2553
|
+
)
|
|
2554
|
+
return ".".join(relative_path_to_source_file.with_suffix("").parts)
|
mlrun/utils/logger.py
CHANGED
|
@@ -393,12 +393,14 @@ def resolve_formatter_by_kind(
|
|
|
393
393
|
|
|
394
394
|
|
|
395
395
|
def create_test_logger(name: str = "mlrun", stream: IO[str] = stdout) -> Logger:
|
|
396
|
-
|
|
396
|
+
logger = create_logger(
|
|
397
397
|
level="debug",
|
|
398
398
|
formatter_kind=FormatterKinds.HUMAN_EXTENDED.name,
|
|
399
399
|
name=name,
|
|
400
400
|
stream=stream,
|
|
401
401
|
)
|
|
402
|
+
logger._logger.propagate = True # pass records up to pytest’s handler
|
|
403
|
+
return logger
|
|
402
404
|
|
|
403
405
|
|
|
404
406
|
def create_logger(
|
|
@@ -15,11 +15,29 @@
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import typing
|
|
17
17
|
from copy import deepcopy
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
import aiohttp
|
|
18
21
|
|
|
19
22
|
import mlrun.common.schemas
|
|
20
23
|
import mlrun.lists
|
|
21
24
|
|
|
22
25
|
|
|
26
|
+
class TimedHTTPClient:
|
|
27
|
+
def __init__(self, timeout: Optional[float] = 30.0):
|
|
28
|
+
"""
|
|
29
|
+
HTTP client wrapper with built-in timeout.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
33
|
+
"""
|
|
34
|
+
self.timeout = aiohttp.ClientTimeout(total=timeout)
|
|
35
|
+
|
|
36
|
+
def session(self, **kwargs) -> aiohttp.ClientSession:
|
|
37
|
+
"""Create a new ClientSession with the configured timeout and additional parameters."""
|
|
38
|
+
return aiohttp.ClientSession(timeout=self.timeout, **kwargs)
|
|
39
|
+
|
|
40
|
+
|
|
23
41
|
class NotificationBase:
|
|
24
42
|
def __init__(
|
|
25
43
|
self,
|
|
@@ -16,13 +16,11 @@ import json
|
|
|
16
16
|
import os
|
|
17
17
|
import typing
|
|
18
18
|
|
|
19
|
-
import aiohttp
|
|
20
|
-
|
|
21
19
|
import mlrun.common.schemas
|
|
22
20
|
import mlrun.errors
|
|
23
21
|
import mlrun.lists
|
|
24
22
|
|
|
25
|
-
from .base import NotificationBase
|
|
23
|
+
from .base import NotificationBase, TimedHTTPClient
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
class GitNotification(NotificationBase):
|
|
@@ -148,7 +146,7 @@ class GitNotification(NotificationBase):
|
|
|
148
146
|
}
|
|
149
147
|
url = f"https://{server}/repos/{repo}/issues/{issue}/comments"
|
|
150
148
|
|
|
151
|
-
async with
|
|
149
|
+
async with TimedHTTPClient().session() as session:
|
|
152
150
|
resp = await session.post(url, headers=headers, json={"body": message})
|
|
153
151
|
if not resp.ok:
|
|
154
152
|
resp_text = await resp.text()
|
|
@@ -14,14 +14,12 @@
|
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import aiohttp
|
|
18
|
-
|
|
19
17
|
import mlrun.common.runtimes.constants as runtimes_constants
|
|
20
18
|
import mlrun.common.schemas
|
|
21
19
|
import mlrun.lists
|
|
22
20
|
import mlrun.utils.helpers
|
|
23
21
|
|
|
24
|
-
from .base import NotificationBase
|
|
22
|
+
from .base import NotificationBase, TimedHTTPClient
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
class SlackNotification(NotificationBase):
|
|
@@ -67,7 +65,7 @@ class SlackNotification(NotificationBase):
|
|
|
67
65
|
|
|
68
66
|
data = self._generate_slack_data(message, severity, runs, alert, event_data)
|
|
69
67
|
|
|
70
|
-
async with
|
|
68
|
+
async with TimedHTTPClient().session() as session:
|
|
71
69
|
async with session.post(webhook, json=data) as response:
|
|
72
70
|
response.raise_for_status()
|
|
73
71
|
|
|
@@ -15,14 +15,13 @@
|
|
|
15
15
|
import re
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
-
import aiohttp
|
|
19
18
|
import orjson
|
|
20
19
|
|
|
21
20
|
import mlrun.common.schemas
|
|
22
21
|
import mlrun.lists
|
|
23
22
|
import mlrun.utils.helpers
|
|
24
23
|
|
|
25
|
-
from .base import NotificationBase
|
|
24
|
+
from .base import NotificationBase, TimedHTTPClient
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
class WebhookNotification(NotificationBase):
|
|
@@ -87,9 +86,7 @@ class WebhookNotification(NotificationBase):
|
|
|
87
86
|
# we automatically handle it as `ssl=None` for their convenience.
|
|
88
87
|
verify_ssl = verify_ssl and None if url.startswith("https") else None
|
|
89
88
|
|
|
90
|
-
async with
|
|
91
|
-
json_serialize=self._encoder,
|
|
92
|
-
) as session:
|
|
89
|
+
async with TimedHTTPClient().session(json_serialize=self._encoder) as session:
|
|
93
90
|
response = await getattr(session, method)(
|
|
94
91
|
url,
|
|
95
92
|
headers=headers,
|
|
@@ -308,7 +308,7 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
308
308
|
and retry_count >= max_retries
|
|
309
309
|
):
|
|
310
310
|
message += (
|
|
311
|
-
"\nRetry limit reached
|
|
311
|
+
"\nRetry limit reached - run has failed after all retry attempts."
|
|
312
312
|
)
|
|
313
313
|
|
|
314
314
|
severity = (
|