mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/utils/helpers.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import asyncio
|
|
15
16
|
import enum
|
|
16
17
|
import functools
|
|
17
18
|
import hashlib
|
|
@@ -22,16 +23,15 @@ import os
|
|
|
22
23
|
import re
|
|
23
24
|
import string
|
|
24
25
|
import sys
|
|
25
|
-
import time
|
|
26
26
|
import typing
|
|
27
|
+
import uuid
|
|
27
28
|
import warnings
|
|
28
29
|
from datetime import datetime, timezone
|
|
29
|
-
from importlib import import_module
|
|
30
|
+
from importlib import import_module, reload
|
|
30
31
|
from os import path
|
|
31
32
|
from types import ModuleType
|
|
32
|
-
from typing import Any,
|
|
33
|
+
from typing import Any, Optional
|
|
33
34
|
|
|
34
|
-
import anyio
|
|
35
35
|
import git
|
|
36
36
|
import inflection
|
|
37
37
|
import numpy as np
|
|
@@ -40,8 +40,8 @@ import pandas
|
|
|
40
40
|
import semver
|
|
41
41
|
import yaml
|
|
42
42
|
from dateutil import parser
|
|
43
|
-
from
|
|
44
|
-
from pandas
|
|
43
|
+
from mlrun_pipelines.models import PipelineRun
|
|
44
|
+
from pandas import Timedelta, Timestamp
|
|
45
45
|
from yaml.representer import RepresenterError
|
|
46
46
|
|
|
47
47
|
import mlrun
|
|
@@ -50,10 +50,17 @@ import mlrun.common.schemas
|
|
|
50
50
|
import mlrun.errors
|
|
51
51
|
import mlrun.utils.regex
|
|
52
52
|
import mlrun.utils.version.version
|
|
53
|
+
from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
|
|
53
54
|
from mlrun.config import config
|
|
54
|
-
from mlrun.errors import err_to_str
|
|
55
55
|
|
|
56
56
|
from .logger import create_logger
|
|
57
|
+
from .retryer import ( # noqa: F401
|
|
58
|
+
AsyncRetryer,
|
|
59
|
+
Retryer,
|
|
60
|
+
create_exponential_backoff,
|
|
61
|
+
create_linear_backoff,
|
|
62
|
+
create_step_backoff,
|
|
63
|
+
)
|
|
57
64
|
|
|
58
65
|
yaml.Dumper.ignore_aliases = lambda *args: True
|
|
59
66
|
_missing = object()
|
|
@@ -70,19 +77,6 @@ class OverwriteBuildParamsWarning(FutureWarning):
|
|
|
70
77
|
pass
|
|
71
78
|
|
|
72
79
|
|
|
73
|
-
# TODO: remove in 1.7.0
|
|
74
|
-
@deprecated(
|
|
75
|
-
version="1.5.0",
|
|
76
|
-
reason="'parse_versioned_object_uri' will be removed from this file in 1.7.0, use "
|
|
77
|
-
"'mlrun.common.helpers.parse_versioned_object_uri' instead",
|
|
78
|
-
category=FutureWarning,
|
|
79
|
-
)
|
|
80
|
-
def parse_versioned_object_uri(uri: str, default_project: str = ""):
|
|
81
|
-
return mlrun.common.helpers.parse_versioned_object_uri(
|
|
82
|
-
uri=uri, default_project=default_project
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
|
|
86
80
|
class StorePrefix:
|
|
87
81
|
"""map mlrun store objects to prefixes"""
|
|
88
82
|
|
|
@@ -113,51 +107,61 @@ class StorePrefix:
|
|
|
113
107
|
|
|
114
108
|
|
|
115
109
|
def get_artifact_target(item: dict, project=None):
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
else:
|
|
121
|
-
db_key = item["spec"].get("db_key")
|
|
122
|
-
project_str = project or item["metadata"].get("project")
|
|
123
|
-
tree = item["metadata"].get("tree")
|
|
124
|
-
|
|
110
|
+
db_key = item["spec"].get("db_key")
|
|
111
|
+
project_str = project or item["metadata"].get("project")
|
|
112
|
+
tree = item["metadata"].get("tree")
|
|
113
|
+
tag = item["metadata"].get("tag")
|
|
125
114
|
kind = item.get("kind")
|
|
126
|
-
|
|
127
|
-
|
|
115
|
+
|
|
116
|
+
if kind in {"dataset", "model", "artifact"} and db_key:
|
|
117
|
+
target = (
|
|
118
|
+
f"{DB_SCHEMA}://{StorePrefix.kind_to_prefix(kind)}/{project_str}/{db_key}"
|
|
119
|
+
)
|
|
120
|
+
target += f":{tag}" if tag else ":latest"
|
|
128
121
|
if tree:
|
|
129
|
-
target
|
|
122
|
+
target += f"@{tree}"
|
|
130
123
|
return target
|
|
131
124
|
|
|
132
|
-
return (
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
125
|
+
return item["spec"].get("target_path")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# TODO: left for migrations testing purposes. Remove in 1.8.0.
|
|
129
|
+
def is_legacy_artifact(artifact):
|
|
130
|
+
if isinstance(artifact, dict):
|
|
131
|
+
return "metadata" not in artifact
|
|
132
|
+
else:
|
|
133
|
+
return not hasattr(artifact, "metadata")
|
|
137
134
|
|
|
138
135
|
|
|
139
136
|
logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
|
|
140
137
|
missing = object()
|
|
141
138
|
|
|
142
|
-
is_ipython = False
|
|
139
|
+
is_ipython = False # is IPython terminal, including Jupyter
|
|
140
|
+
is_jupyter = False # is Jupyter notebook/lab terminal
|
|
143
141
|
try:
|
|
144
|
-
import IPython
|
|
142
|
+
import IPython.core.getipython
|
|
143
|
+
|
|
144
|
+
ipy = IPython.core.getipython.get_ipython()
|
|
145
145
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
146
|
+
is_ipython = ipy is not None
|
|
147
|
+
is_jupyter = (
|
|
148
|
+
is_ipython
|
|
149
|
+
# not IPython
|
|
150
|
+
and "Terminal" not in str(type(ipy))
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
del ipy
|
|
154
|
+
except ModuleNotFoundError:
|
|
151
155
|
pass
|
|
152
156
|
|
|
153
|
-
if
|
|
157
|
+
if is_jupyter and config.nest_asyncio_enabled in ["1", "True"]:
|
|
154
158
|
# bypass Jupyter asyncio bug
|
|
155
159
|
import nest_asyncio
|
|
156
160
|
|
|
157
161
|
nest_asyncio.apply()
|
|
158
162
|
|
|
159
163
|
|
|
160
|
-
class
|
|
164
|
+
class RunKeys:
|
|
161
165
|
input_path = "input_path"
|
|
162
166
|
output_path = "output_path"
|
|
163
167
|
inputs = "inputs"
|
|
@@ -168,6 +172,10 @@ class run_keys:
|
|
|
168
172
|
secrets = "secret_sources"
|
|
169
173
|
|
|
170
174
|
|
|
175
|
+
# for Backward compatibility
|
|
176
|
+
run_keys = RunKeys
|
|
177
|
+
|
|
178
|
+
|
|
171
179
|
def verify_field_regex(
|
|
172
180
|
field_name,
|
|
173
181
|
field_value,
|
|
@@ -189,8 +197,12 @@ def verify_field_regex(
|
|
|
189
197
|
)
|
|
190
198
|
if mode == mlrun.common.schemas.RegexMatchModes.all:
|
|
191
199
|
if raise_on_failure:
|
|
200
|
+
if len(field_name) > max_chars:
|
|
201
|
+
field_name = field_name[:max_chars] + "...truncated"
|
|
202
|
+
if len(field_value) > max_chars:
|
|
203
|
+
field_value = field_value[:max_chars] + "...truncated"
|
|
192
204
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
193
|
-
f"Field '{field_name
|
|
205
|
+
f"Field '{field_name}' is malformed. '{field_value}' "
|
|
194
206
|
f"does not match required pattern: {pattern}"
|
|
195
207
|
)
|
|
196
208
|
return False
|
|
@@ -265,6 +277,17 @@ def validate_artifact_key_name(
|
|
|
265
277
|
)
|
|
266
278
|
|
|
267
279
|
|
|
280
|
+
def validate_inline_artifact_body_size(body: typing.Union[str, bytes, None]) -> None:
|
|
281
|
+
if body and len(body) > MYSQL_MEDIUMBLOB_SIZE_BYTES:
|
|
282
|
+
raise mlrun.errors.MLRunBadRequestError(
|
|
283
|
+
"The body of the artifact exceeds the maximum allowed size. "
|
|
284
|
+
"Avoid embedding the artifact body. "
|
|
285
|
+
"This increases the size of the project yaml file and could affect the project during loading and saving. "
|
|
286
|
+
"More information is available at"
|
|
287
|
+
"https://docs.mlrun.org/en/latest/projects/automate-project-git-source.html#setting-and-registering-the-project-artifacts"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
268
291
|
def validate_v3io_stream_consumer_group(
|
|
269
292
|
value: str, raise_on_failure: bool = True
|
|
270
293
|
) -> bool:
|
|
@@ -276,12 +299,12 @@ def validate_v3io_stream_consumer_group(
|
|
|
276
299
|
)
|
|
277
300
|
|
|
278
301
|
|
|
279
|
-
def get_regex_list_as_string(regex_list:
|
|
302
|
+
def get_regex_list_as_string(regex_list: list) -> str:
|
|
280
303
|
"""
|
|
281
304
|
This function is used to combine a list of regex strings into a single regex,
|
|
282
305
|
with and condition between them.
|
|
283
306
|
"""
|
|
284
|
-
return "".join(["(?={regex})"
|
|
307
|
+
return "".join([f"(?={regex})" for regex in regex_list]) + ".*$"
|
|
285
308
|
|
|
286
309
|
|
|
287
310
|
def tag_name_regex_as_string() -> str:
|
|
@@ -420,7 +443,7 @@ class LogBatchWriter:
|
|
|
420
443
|
|
|
421
444
|
def get_in(obj, keys, default=None):
|
|
422
445
|
"""
|
|
423
|
-
>>> get_in({
|
|
446
|
+
>>> get_in({"a": {"b": 1}}, "a.b")
|
|
424
447
|
1
|
|
425
448
|
"""
|
|
426
449
|
if isinstance(keys, str):
|
|
@@ -652,7 +675,7 @@ def parse_artifact_uri(uri, default_project=""):
|
|
|
652
675
|
[3] = tag
|
|
653
676
|
[4] = tree
|
|
654
677
|
"""
|
|
655
|
-
uri_pattern =
|
|
678
|
+
uri_pattern = mlrun.utils.regex.artifact_uri_pattern
|
|
656
679
|
match = re.match(uri_pattern, uri)
|
|
657
680
|
if not match:
|
|
658
681
|
raise ValueError(
|
|
@@ -667,6 +690,8 @@ def parse_artifact_uri(uri, default_project=""):
|
|
|
667
690
|
raise ValueError(
|
|
668
691
|
f"illegal store path '{uri}', iteration must be integer value"
|
|
669
692
|
)
|
|
693
|
+
else:
|
|
694
|
+
iteration = 0
|
|
670
695
|
return (
|
|
671
696
|
group_dict["project"] or default_project,
|
|
672
697
|
group_dict["key"],
|
|
@@ -698,7 +723,7 @@ def generate_artifact_uri(project, key, tag=None, iter=None, tree=None):
|
|
|
698
723
|
return artifact_uri
|
|
699
724
|
|
|
700
725
|
|
|
701
|
-
def extend_hub_uri_if_needed(uri) ->
|
|
726
|
+
def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
|
|
702
727
|
"""
|
|
703
728
|
Retrieve the full uri of the item's yaml in the hub.
|
|
704
729
|
|
|
@@ -784,34 +809,6 @@ def gen_html_table(header, rows=None):
|
|
|
784
809
|
return style + '<table class="tg">\n' + out + "</table>\n\n"
|
|
785
810
|
|
|
786
811
|
|
|
787
|
-
def new_pipe_metadata(
|
|
788
|
-
artifact_path: str = None,
|
|
789
|
-
cleanup_ttl: int = None,
|
|
790
|
-
op_transformers: typing.List[typing.Callable] = None,
|
|
791
|
-
):
|
|
792
|
-
from kfp.dsl import PipelineConf
|
|
793
|
-
|
|
794
|
-
def _set_artifact_path(task):
|
|
795
|
-
from kubernetes import client as k8s_client
|
|
796
|
-
|
|
797
|
-
task.add_env_variable(
|
|
798
|
-
k8s_client.V1EnvVar(name="MLRUN_ARTIFACT_PATH", value=artifact_path)
|
|
799
|
-
)
|
|
800
|
-
return task
|
|
801
|
-
|
|
802
|
-
conf = PipelineConf()
|
|
803
|
-
cleanup_ttl = cleanup_ttl or int(config.kfp_ttl)
|
|
804
|
-
|
|
805
|
-
if cleanup_ttl:
|
|
806
|
-
conf.set_ttl_seconds_after_finished(cleanup_ttl)
|
|
807
|
-
if artifact_path:
|
|
808
|
-
conf.add_op_transformer(_set_artifact_path)
|
|
809
|
-
if op_transformers:
|
|
810
|
-
for op_transformer in op_transformers:
|
|
811
|
-
conf.add_op_transformer(op_transformer)
|
|
812
|
-
return conf
|
|
813
|
-
|
|
814
|
-
|
|
815
812
|
def _convert_python_package_version_to_image_tag(version: typing.Optional[str]):
|
|
816
813
|
return (
|
|
817
814
|
version.replace("+", "-").replace("0.0.0-", "") if version is not None else None
|
|
@@ -831,7 +828,6 @@ def enrich_image_url(
|
|
|
831
828
|
tag += resolve_image_tag_suffix(
|
|
832
829
|
mlrun_version=mlrun_version, python_version=client_python_version
|
|
833
830
|
)
|
|
834
|
-
registry = config.images_registry
|
|
835
831
|
|
|
836
832
|
# it's an mlrun image if the repository is mlrun
|
|
837
833
|
is_mlrun_image = image_url.startswith("mlrun/") or "/mlrun/" in image_url
|
|
@@ -839,6 +835,10 @@ def enrich_image_url(
|
|
|
839
835
|
if is_mlrun_image and tag and ":" not in image_url:
|
|
840
836
|
image_url = f"{image_url}:{tag}"
|
|
841
837
|
|
|
838
|
+
registry = (
|
|
839
|
+
config.images_registry if is_mlrun_image else config.vendor_images_registry
|
|
840
|
+
)
|
|
841
|
+
|
|
842
842
|
enrich_registry = False
|
|
843
843
|
# enrich registry only if images_to_enrich_registry provided
|
|
844
844
|
# example: "^mlrun/*" means enrich only if the image repository is mlrun and registry is not specified (in which
|
|
@@ -893,7 +893,7 @@ def get_docker_repository_or_default(repository: str) -> str:
|
|
|
893
893
|
return repository
|
|
894
894
|
|
|
895
895
|
|
|
896
|
-
def get_parsed_docker_registry() ->
|
|
896
|
+
def get_parsed_docker_registry() -> tuple[Optional[str], Optional[str]]:
|
|
897
897
|
# according to https://stackoverflow.com/questions/37861791/how-are-docker-image-names-parsed
|
|
898
898
|
docker_registry = config.httpdb.builder.docker_registry or ""
|
|
899
899
|
first_slash_index = docker_registry.find("/")
|
|
@@ -947,65 +947,27 @@ def fill_function_hash(function_dict, tag=""):
|
|
|
947
947
|
return fill_object_hash(function_dict, "hash", tag)
|
|
948
948
|
|
|
949
949
|
|
|
950
|
-
def
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
"""
|
|
954
|
-
x = 0
|
|
955
|
-
comparison = min if coefficient >= 0 else max
|
|
956
|
-
|
|
957
|
-
while True:
|
|
958
|
-
next_value = comparison(base + x * coefficient, stop_value)
|
|
959
|
-
yield next_value
|
|
960
|
-
x += 1
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
def create_step_backoff(steps=None):
|
|
964
|
-
"""
|
|
965
|
-
Create a generator of steps backoff.
|
|
966
|
-
Example: steps = [[2, 5], [20, 10], [120, None]] will produce a generator in which the first 5
|
|
967
|
-
values will be 2, the next 10 values will be 20 and the rest will be 120.
|
|
968
|
-
:param steps: a list of lists [step_value, number_of_iteration_in_this_step]
|
|
969
|
-
"""
|
|
970
|
-
steps = steps if steps is not None else [[2, 10], [10, 10], [120, None]]
|
|
971
|
-
steps = iter(steps)
|
|
972
|
-
|
|
973
|
-
# Get first step
|
|
974
|
-
step = next(steps)
|
|
975
|
-
while True:
|
|
976
|
-
current_step_value, current_step_remain = step
|
|
977
|
-
if current_step_remain == 0:
|
|
978
|
-
# No more in this step, moving on
|
|
979
|
-
step = next(steps)
|
|
980
|
-
elif current_step_remain is None:
|
|
981
|
-
# We are in the last step, staying here forever
|
|
982
|
-
yield current_step_value
|
|
983
|
-
elif current_step_remain > 0:
|
|
984
|
-
# Still more remains in this step, just reduce the remaining number
|
|
985
|
-
step[1] -= 1
|
|
986
|
-
yield current_step_value
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
def create_exponential_backoff(base=2, max_value=120, scale_factor=1):
|
|
950
|
+
def retry_until_successful(
|
|
951
|
+
backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
|
|
952
|
+
):
|
|
990
953
|
"""
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
:param
|
|
994
|
-
|
|
954
|
+
Runs function with given *args and **kwargs.
|
|
955
|
+
Tries to run it until success or timeout reached (timeout is optional)
|
|
956
|
+
:param backoff: can either be a:
|
|
957
|
+
- number (int / float) that will be used as interval.
|
|
958
|
+
- generator of waiting intervals. (support next())
|
|
959
|
+
:param timeout: pass None if timeout is not wanted, number of seconds if it is
|
|
960
|
+
:param logger: a logger so we can log the failures
|
|
961
|
+
:param verbose: whether to log the failure on each retry
|
|
962
|
+
:param _function: function to run
|
|
963
|
+
:param args: functions args
|
|
964
|
+
:param kwargs: functions kwargs
|
|
965
|
+
:return: function result
|
|
995
966
|
"""
|
|
996
|
-
|
|
997
|
-
while True:
|
|
998
|
-
# This "complex" implementation (unlike the one in linear backoff) is to avoid exponent growing too fast and
|
|
999
|
-
# risking going behind max_int
|
|
1000
|
-
next_value = scale_factor * (base**exponent)
|
|
1001
|
-
if next_value < max_value:
|
|
1002
|
-
exponent += 1
|
|
1003
|
-
yield next_value
|
|
1004
|
-
else:
|
|
1005
|
-
yield max_value
|
|
967
|
+
return Retryer(backoff, timeout, logger, verbose, _function, *args, **kwargs).run()
|
|
1006
968
|
|
|
1007
969
|
|
|
1008
|
-
def
|
|
970
|
+
async def retry_until_successful_async(
|
|
1009
971
|
backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
|
|
1010
972
|
):
|
|
1011
973
|
"""
|
|
@@ -1022,80 +984,58 @@ def retry_until_successful(
|
|
|
1022
984
|
:param kwargs: functions kwargs
|
|
1023
985
|
:return: function result
|
|
1024
986
|
"""
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
# Check if backoff is just a simple interval
|
|
1029
|
-
if isinstance(backoff, int) or isinstance(backoff, float):
|
|
1030
|
-
backoff = create_linear_backoff(base=backoff, coefficient=0)
|
|
1031
|
-
|
|
1032
|
-
first_interval = next(backoff)
|
|
1033
|
-
if timeout and timeout <= first_interval:
|
|
1034
|
-
logger.warning(
|
|
1035
|
-
f"Timeout ({timeout}) must be higher than backoff ({first_interval})."
|
|
1036
|
-
f" Set timeout to be higher than backoff."
|
|
1037
|
-
)
|
|
1038
|
-
|
|
1039
|
-
# If deadline was not provided or deadline not reached
|
|
1040
|
-
while timeout is None or time.time() < start_time + timeout:
|
|
1041
|
-
next_interval = first_interval or next(backoff)
|
|
1042
|
-
first_interval = None
|
|
1043
|
-
try:
|
|
1044
|
-
result = _function(*args, **kwargs)
|
|
1045
|
-
return result
|
|
1046
|
-
|
|
1047
|
-
except mlrun.errors.MLRunFatalFailureError as exc:
|
|
1048
|
-
raise exc.original_exception
|
|
1049
|
-
except Exception as exc:
|
|
1050
|
-
last_exception = exc
|
|
1051
|
-
|
|
1052
|
-
# If next interval is within allowed time period - wait on interval, abort otherwise
|
|
1053
|
-
if timeout is None or time.time() + next_interval < start_time + timeout:
|
|
1054
|
-
if logger is not None and verbose:
|
|
1055
|
-
logger.debug(
|
|
1056
|
-
f"Operation not yet successful, Retrying in {next_interval} seconds."
|
|
1057
|
-
f" exc: {err_to_str(exc)}"
|
|
1058
|
-
)
|
|
1059
|
-
|
|
1060
|
-
time.sleep(next_interval)
|
|
1061
|
-
else:
|
|
1062
|
-
break
|
|
1063
|
-
|
|
1064
|
-
if logger is not None:
|
|
1065
|
-
logger.warning(
|
|
1066
|
-
f"Operation did not complete on time. last exception: {last_exception}"
|
|
1067
|
-
)
|
|
1068
|
-
|
|
1069
|
-
raise mlrun.errors.MLRunRetryExhaustedError(
|
|
1070
|
-
f"Failed to execute command by the given deadline."
|
|
1071
|
-
f" last_exception: {last_exception},"
|
|
1072
|
-
f" function_name: {_function.__name__},"
|
|
1073
|
-
f" timeout: {timeout}"
|
|
1074
|
-
) from last_exception
|
|
987
|
+
return await AsyncRetryer(
|
|
988
|
+
backoff, timeout, logger, verbose, _function, *args, **kwargs
|
|
989
|
+
).run()
|
|
1075
990
|
|
|
1076
991
|
|
|
1077
992
|
def get_ui_url(project, uid=None):
|
|
1078
993
|
url = ""
|
|
1079
994
|
if mlrun.mlconf.resolve_ui_url():
|
|
1080
|
-
url = "{}/{}/{}/jobs"
|
|
1081
|
-
mlrun.mlconf.resolve_ui_url(), mlrun.mlconf.ui.projects_prefix, project
|
|
1082
|
-
)
|
|
995
|
+
url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/jobs"
|
|
1083
996
|
if uid:
|
|
1084
997
|
url += f"/monitor/{uid}/overview"
|
|
1085
998
|
return url
|
|
1086
999
|
|
|
1087
1000
|
|
|
1001
|
+
def get_model_endpoint_url(project, model_name, model_endpoint_id):
|
|
1002
|
+
url = ""
|
|
1003
|
+
if mlrun.mlconf.resolve_ui_url():
|
|
1004
|
+
url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
|
|
1005
|
+
if model_name:
|
|
1006
|
+
url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
|
|
1007
|
+
return url
|
|
1008
|
+
|
|
1009
|
+
|
|
1088
1010
|
def get_workflow_url(project, id=None):
|
|
1089
1011
|
url = ""
|
|
1090
1012
|
if mlrun.mlconf.resolve_ui_url():
|
|
1091
|
-
url =
|
|
1092
|
-
mlrun.mlconf.resolve_ui_url()
|
|
1013
|
+
url = (
|
|
1014
|
+
f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
|
|
1015
|
+
f"/{project}/jobs/monitor-workflows/workflow/{id}"
|
|
1093
1016
|
)
|
|
1094
1017
|
return url
|
|
1095
1018
|
|
|
1096
1019
|
|
|
1020
|
+
def get_kfp_project_filter(project_name: str) -> str:
|
|
1021
|
+
"""
|
|
1022
|
+
Generates a filter string for KFP runs, using a substring predicate
|
|
1023
|
+
on the run's 'name' field. This is used as a heuristic to retrieve runs that are associated
|
|
1024
|
+
with a specific project. The 'op: 9' operator indicates that the filter checks if the
|
|
1025
|
+
project name appears as a substring in the run's name, ensuring that we can identify
|
|
1026
|
+
runs belonging to the desired project.
|
|
1027
|
+
"""
|
|
1028
|
+
is_substring_op = 9
|
|
1029
|
+
project_name_filter = {
|
|
1030
|
+
"predicates": [
|
|
1031
|
+
{"key": "name", "op": is_substring_op, "string_value": project_name}
|
|
1032
|
+
]
|
|
1033
|
+
}
|
|
1034
|
+
return json.dumps(project_name_filter)
|
|
1035
|
+
|
|
1036
|
+
|
|
1097
1037
|
def are_strings_in_exception_chain_messages(
|
|
1098
|
-
exception: Exception, strings_list
|
|
1038
|
+
exception: Exception, strings_list: list[str]
|
|
1099
1039
|
) -> bool:
|
|
1100
1040
|
while exception is not None:
|
|
1101
1041
|
if any([string in str(exception) for string in strings_list]):
|
|
@@ -1117,16 +1057,35 @@ def create_class(pkg_class: str):
|
|
|
1117
1057
|
return class_
|
|
1118
1058
|
|
|
1119
1059
|
|
|
1120
|
-
def create_function(pkg_func: str):
|
|
1060
|
+
def create_function(pkg_func: str, reload_modules: bool = False):
|
|
1121
1061
|
"""Create a function from a package.module.function string
|
|
1122
1062
|
|
|
1123
1063
|
:param pkg_func: full function location,
|
|
1124
1064
|
e.g. "sklearn.feature_selection.f_classif"
|
|
1065
|
+
:param reload_modules: reload the function again.
|
|
1125
1066
|
"""
|
|
1126
1067
|
splits = pkg_func.split(".")
|
|
1127
1068
|
pkg_module = ".".join(splits[:-1])
|
|
1128
1069
|
cb_fname = splits[-1]
|
|
1129
1070
|
pkg_module = __import__(pkg_module, fromlist=[cb_fname])
|
|
1071
|
+
|
|
1072
|
+
if reload_modules:
|
|
1073
|
+
# Even though the function appears in the modules list, we need to reload
|
|
1074
|
+
# the code again because it may have changed
|
|
1075
|
+
try:
|
|
1076
|
+
logger.debug("Reloading module", module=pkg_func)
|
|
1077
|
+
_reload(
|
|
1078
|
+
pkg_module,
|
|
1079
|
+
max_recursion_depth=mlrun.mlconf.function.spec.reload_max_recursion_depth,
|
|
1080
|
+
)
|
|
1081
|
+
except Exception as exc:
|
|
1082
|
+
logger.warning(
|
|
1083
|
+
"Failed to reload module. Not all associated modules can be reloaded, import them manually."
|
|
1084
|
+
"Or, with Jupyter, restart the Python kernel.",
|
|
1085
|
+
module=pkg_func,
|
|
1086
|
+
err=mlrun.errors.err_to_str(exc),
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1130
1089
|
function_ = getattr(pkg_module, cb_fname)
|
|
1131
1090
|
return function_
|
|
1132
1091
|
|
|
@@ -1184,8 +1143,14 @@ def get_class(class_name, namespace=None):
|
|
|
1184
1143
|
return class_object
|
|
1185
1144
|
|
|
1186
1145
|
|
|
1187
|
-
def get_function(function,
|
|
1188
|
-
"""
|
|
1146
|
+
def get_function(function, namespaces, reload_modules: bool = False):
|
|
1147
|
+
"""Return function callable object from function name string
|
|
1148
|
+
|
|
1149
|
+
:param function: path to the function ([class_name::]function)
|
|
1150
|
+
:param namespaces: one or list of namespaces/modules to search the function in
|
|
1151
|
+
:param reload_modules: reload the function again
|
|
1152
|
+
:return: function handler (callable)
|
|
1153
|
+
"""
|
|
1189
1154
|
if callable(function):
|
|
1190
1155
|
return function
|
|
1191
1156
|
|
|
@@ -1194,12 +1159,12 @@ def get_function(function, namespace):
|
|
|
1194
1159
|
if not function.endswith(")"):
|
|
1195
1160
|
raise ValueError('function expression must start with "(" and end with ")"')
|
|
1196
1161
|
return eval("lambda event: " + function[1:-1], {}, {})
|
|
1197
|
-
function_object = _search_in_namespaces(function,
|
|
1162
|
+
function_object = _search_in_namespaces(function, namespaces)
|
|
1198
1163
|
if function_object is not None:
|
|
1199
1164
|
return function_object
|
|
1200
1165
|
|
|
1201
1166
|
try:
|
|
1202
|
-
function_object = create_function(function)
|
|
1167
|
+
function_object = create_function(function, reload_modules)
|
|
1203
1168
|
except (ImportError, ValueError) as exc:
|
|
1204
1169
|
raise ImportError(
|
|
1205
1170
|
f"state/function init failed, handler '{function}' not found"
|
|
@@ -1208,18 +1173,24 @@ def get_function(function, namespace):
|
|
|
1208
1173
|
|
|
1209
1174
|
|
|
1210
1175
|
def get_handler_extended(
|
|
1211
|
-
handler_path: str,
|
|
1176
|
+
handler_path: str,
|
|
1177
|
+
context=None,
|
|
1178
|
+
class_args: dict = None,
|
|
1179
|
+
namespaces=None,
|
|
1180
|
+
reload_modules: bool = False,
|
|
1212
1181
|
):
|
|
1213
|
-
"""
|
|
1182
|
+
"""Get function handler from [class_name::]handler string
|
|
1214
1183
|
|
|
1215
1184
|
:param handler_path: path to the function ([class_name::]handler)
|
|
1216
1185
|
:param context: MLRun function/job client context
|
|
1217
1186
|
:param class_args: optional dict of class init kwargs
|
|
1218
1187
|
:param namespaces: one or list of namespaces/modules to search the handler in
|
|
1188
|
+
:param reload_modules: reload the function again
|
|
1219
1189
|
:return: function handler (callable)
|
|
1220
1190
|
"""
|
|
1191
|
+
class_args = class_args or {}
|
|
1221
1192
|
if "::" not in handler_path:
|
|
1222
|
-
return get_function(handler_path, namespaces)
|
|
1193
|
+
return get_function(handler_path, namespaces, reload_modules)
|
|
1223
1194
|
|
|
1224
1195
|
splitted = handler_path.split("::")
|
|
1225
1196
|
class_path = splitted[0].strip()
|
|
@@ -1275,7 +1246,7 @@ def has_timezone(timestamp):
|
|
|
1275
1246
|
return False
|
|
1276
1247
|
|
|
1277
1248
|
|
|
1278
|
-
def as_list(element: Any) ->
|
|
1249
|
+
def as_list(element: Any) -> list[Any]:
|
|
1279
1250
|
return element if isinstance(element, list) else [element]
|
|
1280
1251
|
|
|
1281
1252
|
|
|
@@ -1294,7 +1265,7 @@ def calculate_dataframe_hash(dataframe: pandas.DataFrame):
|
|
|
1294
1265
|
return hashlib.sha1(pandas.util.hash_pandas_object(dataframe).values).hexdigest()
|
|
1295
1266
|
|
|
1296
1267
|
|
|
1297
|
-
def template_artifact_path(artifact_path, project, run_uid=
|
|
1268
|
+
def template_artifact_path(artifact_path, project, run_uid=None):
|
|
1298
1269
|
"""
|
|
1299
1270
|
Replace {{run.uid}} with the run uid and {{project}} with the project name in the artifact path.
|
|
1300
1271
|
If no run uid is provided, the word `project` will be used instead as it is assumed to be a project
|
|
@@ -1302,6 +1273,7 @@ def template_artifact_path(artifact_path, project, run_uid="project"):
|
|
|
1302
1273
|
"""
|
|
1303
1274
|
if not artifact_path:
|
|
1304
1275
|
return artifact_path
|
|
1276
|
+
run_uid = run_uid or "project"
|
|
1305
1277
|
artifact_path = artifact_path.replace("{{run.uid}}", run_uid)
|
|
1306
1278
|
artifact_path = _fill_project_path_template(artifact_path, project)
|
|
1307
1279
|
return artifact_path
|
|
@@ -1323,6 +1295,10 @@ def _fill_project_path_template(artifact_path, project):
|
|
|
1323
1295
|
return artifact_path
|
|
1324
1296
|
|
|
1325
1297
|
|
|
1298
|
+
def to_non_empty_values_dict(input_dict: dict) -> dict:
|
|
1299
|
+
return {key: value for key, value in input_dict.items() if value}
|
|
1300
|
+
|
|
1301
|
+
|
|
1326
1302
|
def str_to_timestamp(time_str: str, now_time: Timestamp = None):
|
|
1327
1303
|
"""convert fixed/relative time string to Pandas Timestamp
|
|
1328
1304
|
|
|
@@ -1361,13 +1337,6 @@ def str_to_timestamp(time_str: str, now_time: Timestamp = None):
|
|
|
1361
1337
|
return Timestamp(time_str)
|
|
1362
1338
|
|
|
1363
1339
|
|
|
1364
|
-
def is_legacy_artifact(artifact):
|
|
1365
|
-
if isinstance(artifact, dict):
|
|
1366
|
-
return "metadata" not in artifact
|
|
1367
|
-
else:
|
|
1368
|
-
return not hasattr(artifact, "metadata")
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
1340
|
def is_link_artifact(artifact):
|
|
1372
1341
|
if isinstance(artifact, dict):
|
|
1373
1342
|
return (
|
|
@@ -1377,7 +1346,7 @@ def is_link_artifact(artifact):
|
|
|
1377
1346
|
return artifact.kind == mlrun.common.schemas.ArtifactCategories.link.value
|
|
1378
1347
|
|
|
1379
1348
|
|
|
1380
|
-
def format_run(run:
|
|
1349
|
+
def format_run(run: PipelineRun, with_project=False) -> dict:
|
|
1381
1350
|
fields = [
|
|
1382
1351
|
"id",
|
|
1383
1352
|
"name",
|
|
@@ -1387,6 +1356,7 @@ def format_run(run: dict, with_project=False) -> dict:
|
|
|
1387
1356
|
"scheduled_at",
|
|
1388
1357
|
"finished_at",
|
|
1389
1358
|
"description",
|
|
1359
|
+
"experiment_id",
|
|
1390
1360
|
]
|
|
1391
1361
|
|
|
1392
1362
|
if with_project:
|
|
@@ -1414,17 +1384,17 @@ def format_run(run: dict, with_project=False) -> dict:
|
|
|
1414
1384
|
# pipelines are yet to populate the status or workflow has failed
|
|
1415
1385
|
# as observed https://jira.iguazeng.com/browse/ML-5195
|
|
1416
1386
|
# set to unknown to ensure a status is returned
|
|
1417
|
-
if run
|
|
1418
|
-
run["status"] = inflection.titleize(
|
|
1387
|
+
if run.get("status", None) is None:
|
|
1388
|
+
run["status"] = inflection.titleize(
|
|
1389
|
+
mlrun.common.runtimes.constants.RunStates.unknown
|
|
1390
|
+
)
|
|
1419
1391
|
|
|
1420
1392
|
return run
|
|
1421
1393
|
|
|
1422
1394
|
|
|
1423
1395
|
def get_in_artifact(artifact: dict, key, default=None, raise_on_missing=False):
|
|
1424
1396
|
"""artifact can be dict or Artifact object"""
|
|
1425
|
-
if
|
|
1426
|
-
return artifact.get(key, default)
|
|
1427
|
-
elif key == "kind":
|
|
1397
|
+
if key == "kind":
|
|
1428
1398
|
return artifact.get(key, default)
|
|
1429
1399
|
else:
|
|
1430
1400
|
for block in ["metadata", "spec", "status"]:
|
|
@@ -1461,11 +1431,27 @@ def is_running_in_jupyter_notebook() -> bool:
|
|
|
1461
1431
|
Check if the code is running inside a Jupyter Notebook.
|
|
1462
1432
|
:return: True if running inside a Jupyter Notebook, False otherwise.
|
|
1463
1433
|
"""
|
|
1464
|
-
|
|
1434
|
+
return is_jupyter
|
|
1465
1435
|
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1436
|
+
|
|
1437
|
+
def create_ipython_display():
|
|
1438
|
+
"""
|
|
1439
|
+
Create an IPython display object and fill it with initial content.
|
|
1440
|
+
We can later use the returned display_id with the update_display method to update the content.
|
|
1441
|
+
If IPython is not installed, a warning will be logged and None will be returned.
|
|
1442
|
+
"""
|
|
1443
|
+
if is_ipython:
|
|
1444
|
+
import IPython
|
|
1445
|
+
|
|
1446
|
+
display_id = uuid.uuid4().hex
|
|
1447
|
+
content = IPython.display.HTML(
|
|
1448
|
+
f'<div id="{display_id}">Temporary Display Content</div>'
|
|
1449
|
+
)
|
|
1450
|
+
IPython.display.display(content, display_id=display_id)
|
|
1451
|
+
return display_id
|
|
1452
|
+
|
|
1453
|
+
# returning None if IPython is not installed, this method shouldn't be called in that case but logging for sanity
|
|
1454
|
+
logger.debug("IPython is not installed, cannot create IPython display")
|
|
1469
1455
|
|
|
1470
1456
|
|
|
1471
1457
|
def as_number(field_name, field_value):
|
|
@@ -1558,13 +1544,15 @@ def normalize_project_username(username: str):
|
|
|
1558
1544
|
return username
|
|
1559
1545
|
|
|
1560
1546
|
|
|
1561
|
-
# run_in threadpool is taken from fastapi to allow us to run sync functions in a threadpool
|
|
1562
|
-
# without importing fastapi in the client
|
|
1563
1547
|
async def run_in_threadpool(func, *args, **kwargs):
|
|
1548
|
+
"""
|
|
1549
|
+
Run a sync-function in the loop default thread pool executor pool and await its result.
|
|
1550
|
+
Note that this function is not suitable for CPU-bound tasks, as it will block the event loop.
|
|
1551
|
+
"""
|
|
1552
|
+
loop = asyncio.get_running_loop()
|
|
1564
1553
|
if kwargs:
|
|
1565
|
-
# run_sync doesn't accept 'kwargs', so bind them in here
|
|
1566
1554
|
func = functools.partial(func, **kwargs)
|
|
1567
|
-
return await
|
|
1555
|
+
return await loop.run_in_executor(None, func, *args)
|
|
1568
1556
|
|
|
1569
1557
|
|
|
1570
1558
|
def is_explicit_ack_supported(context):
|
|
@@ -1630,7 +1618,7 @@ def is_ecr_url(registry: str) -> bool:
|
|
|
1630
1618
|
return ".ecr." in registry and ".amazonaws.com" in registry
|
|
1631
1619
|
|
|
1632
1620
|
|
|
1633
|
-
def get_local_file_schema() ->
|
|
1621
|
+
def get_local_file_schema() -> list:
|
|
1634
1622
|
# The expression `list(string.ascii_lowercase)` generates a list of lowercase alphabets,
|
|
1635
1623
|
# which corresponds to drive letters in Windows file paths such as `C:/Windows/path`.
|
|
1636
1624
|
return ["file"] + list(string.ascii_lowercase)
|
|
@@ -1642,3 +1630,195 @@ def is_safe_path(base, filepath, is_symlink=False):
|
|
|
1642
1630
|
os.path.abspath(filepath) if not is_symlink else os.path.realpath(filepath)
|
|
1643
1631
|
)
|
|
1644
1632
|
return base == os.path.commonpath((base, resolved_filepath))
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def get_serving_spec():
|
|
1636
|
+
data = None
|
|
1637
|
+
|
|
1638
|
+
# we will have the serving spec in either mounted config map
|
|
1639
|
+
# or env depending on the size of the spec and configuration
|
|
1640
|
+
|
|
1641
|
+
try:
|
|
1642
|
+
with open(mlrun.common.constants.MLRUN_SERVING_SPEC_PATH) as f:
|
|
1643
|
+
data = f.read()
|
|
1644
|
+
except FileNotFoundError:
|
|
1645
|
+
pass
|
|
1646
|
+
|
|
1647
|
+
if data is None:
|
|
1648
|
+
data = os.environ.get("SERVING_SPEC_ENV", "")
|
|
1649
|
+
if not data:
|
|
1650
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1651
|
+
"Failed to find serving spec in env var or config file"
|
|
1652
|
+
)
|
|
1653
|
+
spec = json.loads(data)
|
|
1654
|
+
return spec
|
|
1655
|
+
|
|
1656
|
+
|
|
1657
|
+
def additional_filters_warning(additional_filters, class_name):
|
|
1658
|
+
if additional_filters and any(additional_filters):
|
|
1659
|
+
mlrun.utils.logger.warn(
|
|
1660
|
+
f"additional_filters parameter is not supported in {class_name},"
|
|
1661
|
+
f" parameter has been ignored."
|
|
1662
|
+
)
|
|
1663
|
+
|
|
1664
|
+
|
|
1665
|
+
def merge_dicts_with_precedence(*dicts: dict) -> dict:
|
|
1666
|
+
"""
|
|
1667
|
+
Merge multiple dictionaries with precedence given to keys from later dictionaries.
|
|
1668
|
+
|
|
1669
|
+
This function merges an arbitrary number of dictionaries, where keys from dictionaries later
|
|
1670
|
+
in the argument list take precedence over keys from dictionaries earlier in the list. If all
|
|
1671
|
+
dictionaries contain the same key, the value from the last dictionary with that key will
|
|
1672
|
+
overwrite the values from earlier dictionaries.
|
|
1673
|
+
|
|
1674
|
+
Example:
|
|
1675
|
+
>>> first_dict = {"key1": "value1", "key2": "value2"}
|
|
1676
|
+
>>> second_dict = {"key2": "new_value2", "key3": "value3"}
|
|
1677
|
+
>>> third_dict = {"key3": "new_value3", "key4": "value4"}
|
|
1678
|
+
>>> merge_dicts_with_precedence(first_dict, second_dict, third_dict)
|
|
1679
|
+
{'key1': 'value1', 'key2': 'new_value2', 'key3': 'new_value3', 'key4': 'value4'}
|
|
1680
|
+
|
|
1681
|
+
- If no dictionaries are provided, the function returns an empty dictionary.
|
|
1682
|
+
"""
|
|
1683
|
+
return {k: v for d in dicts if d for k, v in d.items()}
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
def validate_component_version_compatibility(
|
|
1687
|
+
component_name: typing.Literal["iguazio", "nuclio"], *min_versions: str
|
|
1688
|
+
):
|
|
1689
|
+
"""
|
|
1690
|
+
:param component_name: Name of the component to validate compatibility for.
|
|
1691
|
+
:param min_versions: Valid minimum version(s) required, assuming no 2 versions has equal major and minor.
|
|
1692
|
+
"""
|
|
1693
|
+
parsed_min_versions = [
|
|
1694
|
+
semver.VersionInfo.parse(min_version) for min_version in min_versions
|
|
1695
|
+
]
|
|
1696
|
+
parsed_current_version = None
|
|
1697
|
+
component_current_version = None
|
|
1698
|
+
try:
|
|
1699
|
+
if component_name == "iguazio":
|
|
1700
|
+
component_current_version = mlrun.mlconf.igz_version
|
|
1701
|
+
parsed_current_version = mlrun.mlconf.get_parsed_igz_version()
|
|
1702
|
+
|
|
1703
|
+
if parsed_current_version:
|
|
1704
|
+
# ignore pre-release and build metadata, as iguazio version always has them, and we only care about the
|
|
1705
|
+
# major, minor, and patch versions
|
|
1706
|
+
parsed_current_version = semver.VersionInfo.parse(
|
|
1707
|
+
f"{parsed_current_version.major}.{parsed_current_version.minor}.{parsed_current_version.patch}"
|
|
1708
|
+
)
|
|
1709
|
+
if component_name == "nuclio":
|
|
1710
|
+
component_current_version = mlrun.mlconf.nuclio_version
|
|
1711
|
+
parsed_current_version = semver.VersionInfo.parse(
|
|
1712
|
+
mlrun.mlconf.nuclio_version
|
|
1713
|
+
)
|
|
1714
|
+
if not parsed_current_version:
|
|
1715
|
+
return True
|
|
1716
|
+
except ValueError:
|
|
1717
|
+
# only log when version is set but invalid
|
|
1718
|
+
if component_current_version:
|
|
1719
|
+
logger.warning(
|
|
1720
|
+
"Unable to parse current version, assuming compatibility",
|
|
1721
|
+
component_name=component_name,
|
|
1722
|
+
current_version=component_current_version,
|
|
1723
|
+
min_versions=min_versions,
|
|
1724
|
+
)
|
|
1725
|
+
return True
|
|
1726
|
+
|
|
1727
|
+
# Feature might have been back-ported e.g. nuclio node selection is supported from
|
|
1728
|
+
# 1.5.20 and 1.6.10 but not in 1.6.9 - therefore we reverse sort to validate against 1.6.x 1st and
|
|
1729
|
+
# then against 1.5.x
|
|
1730
|
+
parsed_min_versions.sort(reverse=True)
|
|
1731
|
+
for parsed_min_version in parsed_min_versions:
|
|
1732
|
+
if (
|
|
1733
|
+
parsed_current_version.major == parsed_min_version.major
|
|
1734
|
+
and parsed_current_version.minor == parsed_min_version.minor
|
|
1735
|
+
and parsed_current_version.patch < parsed_min_version.patch
|
|
1736
|
+
):
|
|
1737
|
+
return False
|
|
1738
|
+
|
|
1739
|
+
if parsed_current_version >= parsed_min_version:
|
|
1740
|
+
return True
|
|
1741
|
+
return False
|
|
1742
|
+
|
|
1743
|
+
|
|
1744
|
+
def format_alert_summary(
|
|
1745
|
+
alert: mlrun.common.schemas.AlertConfig, event_data: mlrun.common.schemas.Event
|
|
1746
|
+
) -> str:
|
|
1747
|
+
result = alert.summary.replace("{{project}}", alert.project)
|
|
1748
|
+
result = result.replace("{{name}}", alert.name)
|
|
1749
|
+
result = result.replace("{{entity}}", event_data.entity.ids[0])
|
|
1750
|
+
return result
|
|
1751
|
+
|
|
1752
|
+
|
|
1753
|
+
def is_parquet_file(file_path, format_=None):
|
|
1754
|
+
return (file_path and file_path.endswith((".parquet", ".pq"))) or (
|
|
1755
|
+
format_ == "parquet"
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+
def validate_single_def_handler(function_kind: str, code: str):
|
|
1760
|
+
# The name of MLRun's wrapper is 'handler', which is why the handler function name cannot be 'handler'
|
|
1761
|
+
# it would override MLRun's wrapper
|
|
1762
|
+
if function_kind == "mlrun":
|
|
1763
|
+
# Find all lines that start with "def handler("
|
|
1764
|
+
pattern = re.compile(r"^def handler\(", re.MULTILINE)
|
|
1765
|
+
matches = pattern.findall(code)
|
|
1766
|
+
|
|
1767
|
+
# Only MLRun's wrapper handler (footer) can be in the code
|
|
1768
|
+
if len(matches) > 1:
|
|
1769
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1770
|
+
"The code file contains a function named “handler“, which is reserved. "
|
|
1771
|
+
+ "Use a different name for your function."
|
|
1772
|
+
)
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
def _reload(module, max_recursion_depth):
|
|
1776
|
+
"""Recursively reload modules."""
|
|
1777
|
+
if max_recursion_depth <= 0:
|
|
1778
|
+
return
|
|
1779
|
+
|
|
1780
|
+
reload(module)
|
|
1781
|
+
for attribute_name in dir(module):
|
|
1782
|
+
attribute = getattr(module, attribute_name)
|
|
1783
|
+
if type(attribute) is ModuleType:
|
|
1784
|
+
_reload(attribute, max_recursion_depth - 1)
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
def run_with_retry(
|
|
1788
|
+
retry_count: int,
|
|
1789
|
+
func: typing.Callable,
|
|
1790
|
+
retry_on_exceptions: typing.Union[
|
|
1791
|
+
type[Exception],
|
|
1792
|
+
tuple[type[Exception]],
|
|
1793
|
+
] = None,
|
|
1794
|
+
*args,
|
|
1795
|
+
**kwargs,
|
|
1796
|
+
):
|
|
1797
|
+
"""
|
|
1798
|
+
Executes a function with retry logic upon encountering specified exceptions.
|
|
1799
|
+
|
|
1800
|
+
:param retry_count: The number of times to retry the function execution.
|
|
1801
|
+
:param func: The function to execute.
|
|
1802
|
+
:param retry_on_exceptions: Exception(s) that trigger a retry. Can be a single exception or a tuple of exceptions.
|
|
1803
|
+
:param args: Positional arguments to pass to the function.
|
|
1804
|
+
:param kwargs: Keyword arguments to pass to the function.
|
|
1805
|
+
:return: The result of the function execution if successful.
|
|
1806
|
+
:raises Exception: Re-raises the last exception encountered after all retries are exhausted.
|
|
1807
|
+
"""
|
|
1808
|
+
if retry_on_exceptions is None:
|
|
1809
|
+
retry_on_exceptions = (Exception,)
|
|
1810
|
+
elif isinstance(retry_on_exceptions, list):
|
|
1811
|
+
retry_on_exceptions = tuple(retry_on_exceptions)
|
|
1812
|
+
|
|
1813
|
+
last_exception = None
|
|
1814
|
+
for attempt in range(retry_count + 1):
|
|
1815
|
+
try:
|
|
1816
|
+
return func(*args, **kwargs)
|
|
1817
|
+
except retry_on_exceptions as exc:
|
|
1818
|
+
last_exception = exc
|
|
1819
|
+
logger.warning(
|
|
1820
|
+
f"Attempt {{{attempt}/ {retry_count}}} failed with exception: {exc}",
|
|
1821
|
+
)
|
|
1822
|
+
if attempt == retry_count:
|
|
1823
|
+
raise
|
|
1824
|
+
raise last_exception
|