mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/model.py
CHANGED
|
@@ -22,18 +22,19 @@ from collections import OrderedDict
|
|
|
22
22
|
from copy import deepcopy
|
|
23
23
|
from datetime import datetime
|
|
24
24
|
from os import environ
|
|
25
|
-
from typing import Any,
|
|
25
|
+
from typing import Any, Optional, Union
|
|
26
26
|
|
|
27
27
|
import pydantic.error_wrappers
|
|
28
28
|
|
|
29
29
|
import mlrun
|
|
30
|
+
import mlrun.common.constants as mlrun_constants
|
|
30
31
|
import mlrun.common.schemas.notification
|
|
32
|
+
import mlrun.utils.regex
|
|
31
33
|
|
|
32
34
|
from .utils import (
|
|
33
35
|
dict_to_json,
|
|
34
36
|
dict_to_yaml,
|
|
35
37
|
get_artifact_target,
|
|
36
|
-
is_legacy_artifact,
|
|
37
38
|
logger,
|
|
38
39
|
template_artifact_path,
|
|
39
40
|
)
|
|
@@ -44,6 +45,15 @@ RUN_ID_PLACE_HOLDER = "{run_id}" # IMPORTANT: shouldn't be changed.
|
|
|
44
45
|
|
|
45
46
|
class ModelObj:
|
|
46
47
|
_dict_fields = []
|
|
48
|
+
# Bellow attributes are used in to_dict method
|
|
49
|
+
# Fields to strip from the object by default if strip=True
|
|
50
|
+
_default_fields_to_strip = []
|
|
51
|
+
# Fields that will be serialized by the object's _serialize_field method
|
|
52
|
+
_fields_to_serialize = []
|
|
53
|
+
# Fields that will be enriched by the object's _enrich_field method
|
|
54
|
+
_fields_to_enrich = []
|
|
55
|
+
# Fields that will be ignored by the object's _is_valid_field_value_for_serialization method
|
|
56
|
+
_fields_to_skip_validation = []
|
|
47
57
|
|
|
48
58
|
@staticmethod
|
|
49
59
|
def _verify_list(param, name):
|
|
@@ -63,26 +73,145 @@ class ModelObj:
|
|
|
63
73
|
return param
|
|
64
74
|
|
|
65
75
|
@mlrun.utils.filter_warnings("ignore", FutureWarning)
|
|
66
|
-
def to_dict(
|
|
67
|
-
|
|
76
|
+
def to_dict(
|
|
77
|
+
self, fields: list = None, exclude: list = None, strip: bool = False
|
|
78
|
+
) -> dict:
|
|
79
|
+
"""
|
|
80
|
+
Convert the object to a dict
|
|
81
|
+
|
|
82
|
+
:param fields: A list of fields to include in the dictionary. If not provided, the default value is taken
|
|
83
|
+
from `self._dict_fields` or from the object __init__ params.
|
|
84
|
+
:param exclude: A list of fields to exclude from the dictionary.
|
|
85
|
+
:param strip: If True, the object's `_default_fields_to_strip` attribute is appended to the exclude list.
|
|
86
|
+
Strip purpose is to remove fields that are context / environment specific and not required for actually
|
|
87
|
+
define the object.
|
|
68
88
|
|
|
69
|
-
:
|
|
70
|
-
:param exclude: list of fields to exclude from the dict
|
|
89
|
+
:return: A dictionary representation of the object.
|
|
71
90
|
"""
|
|
72
91
|
struct = {}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
92
|
+
|
|
93
|
+
fields = self._resolve_initial_to_dict_fields(fields)
|
|
94
|
+
fields_to_exclude = exclude or []
|
|
95
|
+
if strip:
|
|
96
|
+
fields_to_exclude += self._default_fields_to_strip
|
|
97
|
+
|
|
98
|
+
# fields_to_save is built from the fields list minus the fields to exclude minus the fields that requires
|
|
99
|
+
# serialization and enrichment (because they will be added later to the struct)
|
|
100
|
+
fields_to_save = (
|
|
101
|
+
set(fields)
|
|
102
|
+
- set(fields_to_exclude)
|
|
103
|
+
- set(self._fields_to_serialize)
|
|
104
|
+
- set(self._fields_to_enrich)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Iterating over the fields to save and adding them to the struct
|
|
108
|
+
for field_name in fields_to_save:
|
|
109
|
+
field_value = getattr(self, field_name, None)
|
|
110
|
+
if self._is_valid_field_value_for_serialization(
|
|
111
|
+
field_name, field_value, strip
|
|
112
|
+
):
|
|
113
|
+
# If the field value has attribute to_dict, we call it.
|
|
114
|
+
# If one of the attributes is a third party object that has to_dict method (such as k8s objects), then
|
|
115
|
+
# add it to the object's _fields_to_serialize attribute and handle it in the _serialize_field method.
|
|
116
|
+
if hasattr(field_value, "to_dict"):
|
|
117
|
+
field_value = field_value.to_dict(strip=strip)
|
|
118
|
+
if self._is_valid_field_value_for_serialization(
|
|
119
|
+
field_name, field_value, strip
|
|
120
|
+
):
|
|
121
|
+
struct[field_name] = field_value
|
|
122
|
+
else:
|
|
123
|
+
struct[field_name] = field_value
|
|
124
|
+
|
|
125
|
+
# Subtracting the fields_to_exclude from the fields_to_serialize because if we want to exclude a field there
|
|
126
|
+
# is no need to serialize it.
|
|
127
|
+
fields_to_serialize = list(
|
|
128
|
+
set(self._fields_to_serialize) - set(fields_to_exclude)
|
|
129
|
+
)
|
|
130
|
+
self._resolve_field_value_by_method(
|
|
131
|
+
struct, self._serialize_field, fields_to_serialize, strip
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Subtracting the fields_to_exclude from the fields_to_enrich because if we want to exclude a field there
|
|
135
|
+
# is no need to enrich it.
|
|
136
|
+
fields_to_enrich = list(set(self._fields_to_enrich) - set(fields_to_exclude))
|
|
137
|
+
self._resolve_field_value_by_method(
|
|
138
|
+
struct, self._enrich_field, fields_to_enrich, strip
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self._apply_enrichment_before_to_dict_completion(struct, strip=strip)
|
|
142
|
+
return struct
|
|
143
|
+
|
|
144
|
+
def _resolve_initial_to_dict_fields(self, fields: list = None) -> list:
|
|
145
|
+
"""
|
|
146
|
+
Resolve fields to be used in to_dict method.
|
|
147
|
+
If fields is None, use `_dict_fields` attribute of the object.
|
|
148
|
+
If fields is None and `_dict_fields` is empty, use the object's __init__ parameters.
|
|
149
|
+
:param fields: List of fields to iterate over.
|
|
150
|
+
|
|
151
|
+
:return: List of fields to iterate over.
|
|
152
|
+
"""
|
|
153
|
+
return (
|
|
154
|
+
fields
|
|
155
|
+
or self._dict_fields
|
|
156
|
+
or list(inspect.signature(self.__init__).parameters.keys())
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def _is_valid_field_value_for_serialization(
|
|
160
|
+
self, field_name: str, field_value: str, strip: bool = False
|
|
161
|
+
) -> bool:
|
|
162
|
+
"""
|
|
163
|
+
Check if the field value is valid for serialization.
|
|
164
|
+
If field name is in `_fields_to_skip_validation` attribute, skip validation and return True.
|
|
165
|
+
If strip is False skip validation and return True.
|
|
166
|
+
If field value is None or empty dict/list, then no need to store it.
|
|
167
|
+
:param field_name: Field name.
|
|
168
|
+
:param field_value: Field value.
|
|
169
|
+
|
|
170
|
+
:return: True if the field value is valid for serialization, False otherwise.
|
|
171
|
+
"""
|
|
172
|
+
if field_name in self._fields_to_skip_validation:
|
|
173
|
+
return True
|
|
174
|
+
# TODO: remove when Runtime initialization will be refactored and enrichment will be moved to BE
|
|
175
|
+
# if not strip:
|
|
176
|
+
# return True
|
|
177
|
+
|
|
178
|
+
return field_value is not None and not (
|
|
179
|
+
(isinstance(field_value, dict) or isinstance(field_value, list))
|
|
180
|
+
and not field_value
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def _resolve_field_value_by_method(
|
|
184
|
+
self,
|
|
185
|
+
struct: dict,
|
|
186
|
+
method: typing.Callable,
|
|
187
|
+
fields: typing.Union[list, set] = None,
|
|
188
|
+
strip: bool = False,
|
|
189
|
+
) -> dict:
|
|
190
|
+
for field_name in fields:
|
|
191
|
+
field_value = method(struct=struct, field_name=field_name, strip=strip)
|
|
192
|
+
if self._is_valid_field_value_for_serialization(
|
|
193
|
+
field_name, field_value, strip
|
|
194
|
+
):
|
|
195
|
+
struct[field_name] = field_value
|
|
196
|
+
return struct
|
|
197
|
+
|
|
198
|
+
def _serialize_field(
|
|
199
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
200
|
+
) -> typing.Any:
|
|
201
|
+
# We pull the field from self and not from struct because it was excluded from the struct when looping over
|
|
202
|
+
# the fields to save.
|
|
203
|
+
return getattr(self, field_name, None)
|
|
204
|
+
|
|
205
|
+
def _enrich_field(
|
|
206
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
207
|
+
) -> typing.Any:
|
|
208
|
+
# We first try to pull from struct because the field might have been already serialized and if not,
|
|
209
|
+
# we pull from self
|
|
210
|
+
return struct.get(field_name, None) or getattr(self, field_name, None)
|
|
211
|
+
|
|
212
|
+
def _apply_enrichment_before_to_dict_completion(
|
|
213
|
+
self, struct: dict, strip: bool = False
|
|
214
|
+
) -> dict:
|
|
86
215
|
return struct
|
|
87
216
|
|
|
88
217
|
@classmethod
|
|
@@ -111,19 +240,21 @@ class ModelObj:
|
|
|
111
240
|
|
|
112
241
|
return new_obj
|
|
113
242
|
|
|
114
|
-
def to_yaml(self, exclude=None) -> str:
|
|
243
|
+
def to_yaml(self, exclude=None, strip: bool = False) -> str:
|
|
115
244
|
"""convert the object to yaml
|
|
116
245
|
|
|
117
246
|
:param exclude: list of fields to exclude from the yaml
|
|
247
|
+
:param strip: if True, strip fields that are not required for actually define the object
|
|
118
248
|
"""
|
|
119
|
-
return dict_to_yaml(self.to_dict(exclude=exclude))
|
|
249
|
+
return dict_to_yaml(self.to_dict(exclude=exclude, strip=strip))
|
|
120
250
|
|
|
121
|
-
def to_json(self, exclude=None):
|
|
251
|
+
def to_json(self, exclude=None, strip: bool = False):
|
|
122
252
|
"""convert the object to json
|
|
123
253
|
|
|
124
254
|
:param exclude: list of fields to exclude from the json
|
|
255
|
+
:param strip: if True, strip fields that are not required for actually define the object
|
|
125
256
|
"""
|
|
126
|
-
return dict_to_json(self.to_dict(exclude=exclude))
|
|
257
|
+
return dict_to_json(self.to_dict(exclude=exclude, strip=strip))
|
|
127
258
|
|
|
128
259
|
def to_str(self):
|
|
129
260
|
"""convert the object to string (with dict layout)"""
|
|
@@ -175,8 +306,8 @@ class ObjectDict:
|
|
|
175
306
|
self._children[key] = child
|
|
176
307
|
return child
|
|
177
308
|
|
|
178
|
-
def to_dict(self):
|
|
179
|
-
return {k: v.to_dict() for k, v in self._children.items()}
|
|
309
|
+
def to_dict(self, strip: bool = False):
|
|
310
|
+
return {k: v.to_dict(strip=strip) for k, v in self._children.items()}
|
|
180
311
|
|
|
181
312
|
@classmethod
|
|
182
313
|
def from_dict(cls, classes_map: dict, children=None, default_kind=""):
|
|
@@ -258,9 +389,9 @@ class ObjectList:
|
|
|
258
389
|
def __delitem__(self, key):
|
|
259
390
|
del self._children[key]
|
|
260
391
|
|
|
261
|
-
def to_dict(self):
|
|
392
|
+
def to_dict(self, strip: bool = False):
|
|
262
393
|
# method used by ModelObj class to serialize the object to nested dict
|
|
263
|
-
return [t.to_dict() for t in self._children.values()]
|
|
394
|
+
return [t.to_dict(strip=strip) for t in self._children.values()]
|
|
264
395
|
|
|
265
396
|
@classmethod
|
|
266
397
|
def from_list(cls, child_class, children=None):
|
|
@@ -305,6 +436,18 @@ class Credentials(ModelObj):
|
|
|
305
436
|
|
|
306
437
|
|
|
307
438
|
class BaseMetadata(ModelObj):
|
|
439
|
+
_default_fields_to_strip = ModelObj._default_fields_to_strip + [
|
|
440
|
+
"hash",
|
|
441
|
+
# Below are environment specific fields, no need to keep when stripping
|
|
442
|
+
"namespace",
|
|
443
|
+
"project",
|
|
444
|
+
"labels",
|
|
445
|
+
"annotations",
|
|
446
|
+
"credentials",
|
|
447
|
+
# Below are state fields, no need to keep when stripping
|
|
448
|
+
"updated",
|
|
449
|
+
]
|
|
450
|
+
|
|
308
451
|
def __init__(
|
|
309
452
|
self,
|
|
310
453
|
name=None,
|
|
@@ -344,7 +487,7 @@ class ImageBuilder(ModelObj):
|
|
|
344
487
|
|
|
345
488
|
def __init__(
|
|
346
489
|
self,
|
|
347
|
-
functionSourceCode=None,
|
|
490
|
+
functionSourceCode=None, # noqa: N803 - should be "snake_case", kept for BC
|
|
348
491
|
source=None,
|
|
349
492
|
image=None,
|
|
350
493
|
base_image=None,
|
|
@@ -449,7 +592,7 @@ class ImageBuilder(ModelObj):
|
|
|
449
592
|
|
|
450
593
|
def with_commands(
|
|
451
594
|
self,
|
|
452
|
-
commands:
|
|
595
|
+
commands: list[str],
|
|
453
596
|
overwrite: bool = False,
|
|
454
597
|
):
|
|
455
598
|
"""add commands to build spec.
|
|
@@ -476,7 +619,7 @@ class ImageBuilder(ModelObj):
|
|
|
476
619
|
|
|
477
620
|
def with_requirements(
|
|
478
621
|
self,
|
|
479
|
-
requirements: Optional[
|
|
622
|
+
requirements: Optional[list[str]] = None,
|
|
480
623
|
requirements_file: str = "",
|
|
481
624
|
overwrite: bool = False,
|
|
482
625
|
):
|
|
@@ -509,7 +652,7 @@ class ImageBuilder(ModelObj):
|
|
|
509
652
|
|
|
510
653
|
# handle the requirements_file argument
|
|
511
654
|
if requirements_file:
|
|
512
|
-
with open(requirements_file
|
|
655
|
+
with open(requirements_file) as fp:
|
|
513
656
|
requirements_to_resolve.extend(fp.read().splitlines())
|
|
514
657
|
|
|
515
658
|
# handle the requirements argument
|
|
@@ -536,14 +679,36 @@ class ImageBuilder(ModelObj):
|
|
|
536
679
|
|
|
537
680
|
|
|
538
681
|
class Notification(ModelObj):
|
|
539
|
-
"""Notification
|
|
682
|
+
"""Notification object
|
|
683
|
+
|
|
684
|
+
:param kind: notification implementation kind - slack, webhook, etc. See
|
|
685
|
+
:py:class:`mlrun.common.schemas.notification.NotificationKind`
|
|
686
|
+
:param name: for logging and identification
|
|
687
|
+
:param message: message content in the notification
|
|
688
|
+
:param severity: severity to display in the notification
|
|
689
|
+
:param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
|
|
690
|
+
:param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
|
|
691
|
+
to evaluate if the notification should be sent in addition to the 'when' statuses.
|
|
692
|
+
e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
|
|
693
|
+
:param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
|
|
694
|
+
git repository details, etc.)
|
|
695
|
+
:param secret_params: secret parameters for the notification implementation, same as params but will be stored
|
|
696
|
+
in a k8s secret and passed as a secret reference to the implementation.
|
|
697
|
+
:param status: notification status - pending, sent, error
|
|
698
|
+
:param sent_time: time the notification was sent
|
|
699
|
+
:param reason: failure reason if the notification failed to send
|
|
700
|
+
"""
|
|
540
701
|
|
|
541
702
|
def __init__(
|
|
542
703
|
self,
|
|
543
|
-
kind=
|
|
704
|
+
kind: mlrun.common.schemas.notification.NotificationKind = (
|
|
705
|
+
mlrun.common.schemas.notification.NotificationKind.slack
|
|
706
|
+
),
|
|
544
707
|
name=None,
|
|
545
708
|
message=None,
|
|
546
|
-
severity=
|
|
709
|
+
severity: mlrun.common.schemas.notification.NotificationSeverity = (
|
|
710
|
+
mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
711
|
+
),
|
|
547
712
|
when=None,
|
|
548
713
|
condition=None,
|
|
549
714
|
secret_params=None,
|
|
@@ -552,12 +717,10 @@ class Notification(ModelObj):
|
|
|
552
717
|
sent_time=None,
|
|
553
718
|
reason=None,
|
|
554
719
|
):
|
|
555
|
-
self.kind = kind
|
|
720
|
+
self.kind = kind
|
|
556
721
|
self.name = name or ""
|
|
557
722
|
self.message = message or ""
|
|
558
|
-
self.severity =
|
|
559
|
-
severity or mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
560
|
-
)
|
|
723
|
+
self.severity = severity
|
|
561
724
|
self.when = when or ["completed"]
|
|
562
725
|
self.condition = condition or ""
|
|
563
726
|
self.secret_params = secret_params or {}
|
|
@@ -587,8 +750,49 @@ class Notification(ModelObj):
|
|
|
587
750
|
"Notification params size exceeds max size of 1 MB"
|
|
588
751
|
)
|
|
589
752
|
|
|
753
|
+
def validate_notification_params(self):
|
|
754
|
+
notification_class = mlrun.utils.notifications.NotificationTypes(
|
|
755
|
+
self.kind
|
|
756
|
+
).get_notification()
|
|
757
|
+
|
|
758
|
+
secret_params = self.secret_params or {}
|
|
759
|
+
params = self.params or {}
|
|
760
|
+
|
|
761
|
+
# if the secret_params are already masked - no need to validate
|
|
762
|
+
params_secret = secret_params.get("secret", "")
|
|
763
|
+
if params_secret:
|
|
764
|
+
if len(secret_params) > 1:
|
|
765
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
766
|
+
"When the 'secret' key is present, 'secret_params' should not contain any other keys."
|
|
767
|
+
)
|
|
768
|
+
return
|
|
769
|
+
|
|
770
|
+
if not secret_params and not params:
|
|
771
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
772
|
+
"Both 'secret_params' and 'params' are empty, at least one must be defined."
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
notification_class.validate_params(secret_params | params)
|
|
776
|
+
|
|
777
|
+
def enrich_unmasked_secret_params_from_project_secret(self):
|
|
778
|
+
"""
|
|
779
|
+
Fill the notification secret params from the project secret.
|
|
780
|
+
We are using this function instead of unmask_secret_params_from_project_secret when we run inside the
|
|
781
|
+
workflow runner pod that doesn't have access to the k8s secrets (but have access to the project secret)
|
|
782
|
+
"""
|
|
783
|
+
secret = self.secret_params.get("secret")
|
|
784
|
+
if secret:
|
|
785
|
+
secret_value = mlrun.get_secret_or_env(secret)
|
|
786
|
+
if secret_value:
|
|
787
|
+
try:
|
|
788
|
+
self.secret_params = json.loads(secret_value)
|
|
789
|
+
except ValueError as exc:
|
|
790
|
+
raise mlrun.errors.MLRunValueError(
|
|
791
|
+
"Failed to parse secret value"
|
|
792
|
+
) from exc
|
|
793
|
+
|
|
590
794
|
@staticmethod
|
|
591
|
-
def validate_notification_uniqueness(notifications:
|
|
795
|
+
def validate_notification_uniqueness(notifications: list["Notification"]):
|
|
592
796
|
"""Validate that all notifications in the list are unique by name"""
|
|
593
797
|
names = [notification.name for notification in notifications]
|
|
594
798
|
if len(names) != len(set(names)):
|
|
@@ -627,7 +831,10 @@ class RunMetadata(ModelObj):
|
|
|
627
831
|
def is_workflow_runner(self):
|
|
628
832
|
if not self.labels:
|
|
629
833
|
return False
|
|
630
|
-
return
|
|
834
|
+
return (
|
|
835
|
+
self.labels.get(mlrun_constants.MLRunInternalLabels.job_type, "")
|
|
836
|
+
== "workflow-runner"
|
|
837
|
+
)
|
|
631
838
|
|
|
632
839
|
|
|
633
840
|
class HyperParamStrategies:
|
|
@@ -697,6 +904,10 @@ class HyperParamOptions(ModelObj):
|
|
|
697
904
|
class RunSpec(ModelObj):
|
|
698
905
|
"""Run specification"""
|
|
699
906
|
|
|
907
|
+
_fields_to_serialize = ModelObj._fields_to_serialize + [
|
|
908
|
+
"handler",
|
|
909
|
+
]
|
|
910
|
+
|
|
700
911
|
def __init__(
|
|
701
912
|
self,
|
|
702
913
|
parameters=None,
|
|
@@ -720,6 +931,8 @@ class RunSpec(ModelObj):
|
|
|
720
931
|
returns=None,
|
|
721
932
|
notifications=None,
|
|
722
933
|
state_thresholds=None,
|
|
934
|
+
reset_on_run=None,
|
|
935
|
+
node_selector=None,
|
|
723
936
|
):
|
|
724
937
|
# A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
|
|
725
938
|
# keys (parameter names) and the values are the type hint given in the input keys after the colon.
|
|
@@ -756,19 +969,25 @@ class RunSpec(ModelObj):
|
|
|
756
969
|
self.allow_empty_resources = allow_empty_resources
|
|
757
970
|
self._notifications = notifications or []
|
|
758
971
|
self.state_thresholds = state_thresholds or {}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
972
|
+
self.reset_on_run = reset_on_run
|
|
973
|
+
self.node_selector = node_selector or {}
|
|
974
|
+
|
|
975
|
+
def _serialize_field(
|
|
976
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
977
|
+
) -> Optional[str]:
|
|
978
|
+
# We pull the field from self and not from struct because it was excluded from the struct
|
|
979
|
+
if field_name == "handler":
|
|
980
|
+
if self.handler and isinstance(self.handler, str):
|
|
981
|
+
return self.handler
|
|
982
|
+
return None
|
|
983
|
+
return super()._serialize_field(struct, field_name, strip)
|
|
765
984
|
|
|
766
985
|
def is_hyper_job(self):
|
|
767
986
|
param_file = self.param_file or self.hyper_param_options.param_file
|
|
768
987
|
return param_file or self.hyperparams
|
|
769
988
|
|
|
770
989
|
@property
|
|
771
|
-
def inputs(self) ->
|
|
990
|
+
def inputs(self) -> dict[str, str]:
|
|
772
991
|
"""
|
|
773
992
|
Get the inputs dictionary. A dictionary of parameter names as keys and paths as values.
|
|
774
993
|
|
|
@@ -777,7 +996,7 @@ class RunSpec(ModelObj):
|
|
|
777
996
|
return self._inputs
|
|
778
997
|
|
|
779
998
|
@inputs.setter
|
|
780
|
-
def inputs(self, inputs:
|
|
999
|
+
def inputs(self, inputs: dict[str, str]):
|
|
781
1000
|
"""
|
|
782
1001
|
Set the given inputs in the spec. Inputs can include a type hint string in their keys following a colon, meaning
|
|
783
1002
|
following this structure: "<input key : type hint>".
|
|
@@ -786,7 +1005,7 @@ class RunSpec(ModelObj):
|
|
|
786
1005
|
|
|
787
1006
|
>>> run_spec.inputs = {
|
|
788
1007
|
... "my_input": "...",
|
|
789
|
-
... "my_hinted_input : pandas.DataFrame": "..."
|
|
1008
|
+
... "my_hinted_input : pandas.DataFrame": "...",
|
|
790
1009
|
... }
|
|
791
1010
|
|
|
792
1011
|
:param inputs: The inputs to set.
|
|
@@ -800,7 +1019,7 @@ class RunSpec(ModelObj):
|
|
|
800
1019
|
self._inputs = self._verify_dict(inputs, "inputs")
|
|
801
1020
|
|
|
802
1021
|
@property
|
|
803
|
-
def inputs_type_hints(self) ->
|
|
1022
|
+
def inputs_type_hints(self) -> dict[str, str]:
|
|
804
1023
|
"""
|
|
805
1024
|
Get the input type hints. A dictionary of parameter names as keys and their type hints as values.
|
|
806
1025
|
|
|
@@ -809,7 +1028,7 @@ class RunSpec(ModelObj):
|
|
|
809
1028
|
return self._inputs_type_hints
|
|
810
1029
|
|
|
811
1030
|
@inputs_type_hints.setter
|
|
812
|
-
def inputs_type_hints(self, inputs_type_hints:
|
|
1031
|
+
def inputs_type_hints(self, inputs_type_hints: dict[str, str]):
|
|
813
1032
|
"""
|
|
814
1033
|
Set the inputs type hints to parse during a run.
|
|
815
1034
|
|
|
@@ -830,7 +1049,7 @@ class RunSpec(ModelObj):
|
|
|
830
1049
|
return self._returns
|
|
831
1050
|
|
|
832
1051
|
@returns.setter
|
|
833
|
-
def returns(self, returns:
|
|
1052
|
+
def returns(self, returns: list[Union[str, dict[str, str]]]):
|
|
834
1053
|
"""
|
|
835
1054
|
Set the returns list to log the returning values at the end of a run.
|
|
836
1055
|
|
|
@@ -864,7 +1083,7 @@ class RunSpec(ModelObj):
|
|
|
864
1083
|
)
|
|
865
1084
|
|
|
866
1085
|
@property
|
|
867
|
-
def outputs(self) ->
|
|
1086
|
+
def outputs(self) -> list[str]:
|
|
868
1087
|
"""
|
|
869
1088
|
Get the expected outputs. The list is constructed from keys of both the `outputs` and `returns` properties.
|
|
870
1089
|
|
|
@@ -929,7 +1148,7 @@ class RunSpec(ModelObj):
|
|
|
929
1148
|
return self._state_thresholds
|
|
930
1149
|
|
|
931
1150
|
@state_thresholds.setter
|
|
932
|
-
def state_thresholds(self, state_thresholds:
|
|
1151
|
+
def state_thresholds(self, state_thresholds: dict[str, str]):
|
|
933
1152
|
"""
|
|
934
1153
|
Set the dictionary of k8s resource states to thresholds time strings.
|
|
935
1154
|
The state will be matched against the pod's status. The threshold should be a time string that conforms
|
|
@@ -981,8 +1200,8 @@ class RunSpec(ModelObj):
|
|
|
981
1200
|
|
|
982
1201
|
@staticmethod
|
|
983
1202
|
def join_outputs_and_returns(
|
|
984
|
-
outputs:
|
|
985
|
-
) ->
|
|
1203
|
+
outputs: list[str], returns: list[Union[str, dict[str, str]]]
|
|
1204
|
+
) -> list[str]:
|
|
986
1205
|
"""
|
|
987
1206
|
Get the outputs set in the spec. The outputs are constructed from both the 'outputs' and 'returns' properties
|
|
988
1207
|
that were set by the user.
|
|
@@ -1013,7 +1232,7 @@ class RunSpec(ModelObj):
|
|
|
1013
1232
|
return outputs
|
|
1014
1233
|
|
|
1015
1234
|
@staticmethod
|
|
1016
|
-
def _separate_type_hint_from_input_key(input_key: str) ->
|
|
1235
|
+
def _separate_type_hint_from_input_key(input_key: str) -> tuple[str, str]:
|
|
1017
1236
|
"""
|
|
1018
1237
|
An input key in the `inputs` dictionary parameter of a task (or `Runtime.run` method) or the docs setting of a
|
|
1019
1238
|
`Runtime` handler can be provided with a colon to specify its type hint in the following structure:
|
|
@@ -1057,7 +1276,7 @@ class RunStatus(ModelObj):
|
|
|
1057
1276
|
iterations=None,
|
|
1058
1277
|
ui_url=None,
|
|
1059
1278
|
reason: str = None,
|
|
1060
|
-
notifications:
|
|
1279
|
+
notifications: dict[str, Notification] = None,
|
|
1061
1280
|
artifact_uris: dict[str, str] = None,
|
|
1062
1281
|
):
|
|
1063
1282
|
self.state = state or "created"
|
|
@@ -1127,7 +1346,7 @@ class RunTemplate(ModelObj):
|
|
|
1127
1346
|
|
|
1128
1347
|
task.with_input("data", "/file-dir/path/to/file")
|
|
1129
1348
|
task.with_input("data", "s3://<bucket>/path/to/file")
|
|
1130
|
-
task.with_input("data", "v3io
|
|
1349
|
+
task.with_input("data", "v3io://<data-container>/path/to/file")
|
|
1131
1350
|
"""
|
|
1132
1351
|
if not self.spec.inputs:
|
|
1133
1352
|
self.spec.inputs = {}
|
|
@@ -1146,7 +1365,7 @@ class RunTemplate(ModelObj):
|
|
|
1146
1365
|
|
|
1147
1366
|
example::
|
|
1148
1367
|
|
|
1149
|
-
grid_params = {"p1": [2,4,1], "p2": [10,20]}
|
|
1368
|
+
grid_params = {"p1": [2, 4, 1], "p2": [10, 20]}
|
|
1150
1369
|
task = mlrun.new_task("grid-search")
|
|
1151
1370
|
task.with_hyper_params(grid_params, selector="max.accuracy")
|
|
1152
1371
|
"""
|
|
@@ -1284,33 +1503,63 @@ class RunObject(RunTemplate):
|
|
|
1284
1503
|
@property
|
|
1285
1504
|
def error(self) -> str:
|
|
1286
1505
|
"""error string if failed"""
|
|
1287
|
-
if
|
|
1506
|
+
if (
|
|
1507
|
+
self.status
|
|
1508
|
+
and self.status.state
|
|
1509
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
1510
|
+
):
|
|
1288
1511
|
unknown_error = ""
|
|
1289
1512
|
if (
|
|
1290
1513
|
self.status.state
|
|
1291
|
-
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1514
|
+
in mlrun.common.runtimes.constants.RunStates.abortion_states()
|
|
1292
1515
|
):
|
|
1293
1516
|
unknown_error = "Run was aborted"
|
|
1294
1517
|
|
|
1295
|
-
elif
|
|
1518
|
+
elif (
|
|
1519
|
+
self.status.state
|
|
1520
|
+
in mlrun.common.runtimes.constants.RunStates.error_states()
|
|
1521
|
+
):
|
|
1296
1522
|
unknown_error = "Unknown error"
|
|
1297
1523
|
|
|
1298
1524
|
return (
|
|
1299
1525
|
self.status.error
|
|
1300
|
-
or self.status.reason
|
|
1301
1526
|
or self.status.status_text
|
|
1527
|
+
or self.status.reason
|
|
1302
1528
|
or unknown_error
|
|
1303
1529
|
)
|
|
1304
1530
|
return ""
|
|
1305
1531
|
|
|
1306
|
-
def output(self, key):
|
|
1307
|
-
"""
|
|
1532
|
+
def output(self, key: str):
|
|
1533
|
+
"""
|
|
1534
|
+
Return the value of a specific result or artifact by key.
|
|
1535
|
+
|
|
1536
|
+
This method waits for the outputs to complete and retrieves the value corresponding to the provided key.
|
|
1537
|
+
If the key exists in the results, it returns the corresponding result value.
|
|
1538
|
+
If not found in results, it attempts to fetch the artifact by key (cached in the run status).
|
|
1539
|
+
If the artifact is not found, it tries to fetch the artifact URI by key.
|
|
1540
|
+
If no artifact or result is found for the key, returns None.
|
|
1541
|
+
|
|
1542
|
+
:param key: The key of the result or artifact to retrieve.
|
|
1543
|
+
:return: The value of the result or the artifact URI corresponding to the key, or None if not found.
|
|
1544
|
+
"""
|
|
1308
1545
|
self._outputs_wait_for_completion()
|
|
1546
|
+
|
|
1547
|
+
# Check if the key exists in results and return the result value
|
|
1309
1548
|
if self.status.results and key in self.status.results:
|
|
1310
|
-
return self.status.results
|
|
1549
|
+
return self.status.results[key]
|
|
1550
|
+
|
|
1551
|
+
# Artifacts are usually cached in the run object under `status.artifacts`. However, the artifacts are not
|
|
1552
|
+
# stored in the DB as part of the run. The server may enrich the run with the artifacts or provide
|
|
1553
|
+
# `status.artifact_uris` instead. See mlrun.common.formatters.run.RunFormat.
|
|
1554
|
+
# When running locally - `status.artifact_uri` does not exist in the run.
|
|
1555
|
+
# When listing runs - `status.artifacts` does not exist in the run.
|
|
1311
1556
|
artifact = self._artifact(key)
|
|
1312
1557
|
if artifact:
|
|
1313
1558
|
return get_artifact_target(artifact, self.metadata.project)
|
|
1559
|
+
|
|
1560
|
+
if self.status.artifact_uris and key in self.status.artifact_uris:
|
|
1561
|
+
return self.status.artifact_uris[key]
|
|
1562
|
+
|
|
1314
1563
|
return None
|
|
1315
1564
|
|
|
1316
1565
|
@property
|
|
@@ -1318,31 +1567,55 @@ class RunObject(RunTemplate):
|
|
|
1318
1567
|
"""UI URL (for relevant runtimes)"""
|
|
1319
1568
|
self.refresh()
|
|
1320
1569
|
if not self._status.ui_url:
|
|
1321
|
-
print("UI currently not available (status={
|
|
1570
|
+
print(f"UI currently not available (status={self._status.state})")
|
|
1322
1571
|
return self._status.ui_url
|
|
1323
1572
|
|
|
1324
1573
|
@property
|
|
1325
1574
|
def outputs(self):
|
|
1326
|
-
"""
|
|
1327
|
-
outputs
|
|
1575
|
+
"""
|
|
1576
|
+
Return a dictionary of outputs, including result values and artifact URIs.
|
|
1577
|
+
|
|
1578
|
+
This method waits for the outputs to complete and combines result values
|
|
1579
|
+
and artifact URIs into a single dictionary. If there are multiple artifacts
|
|
1580
|
+
for the same key, only include the artifact that does not have the "latest" tag.
|
|
1581
|
+
If there is no other tag, include the "latest" tag as a fallback.
|
|
1582
|
+
|
|
1583
|
+
:return: Dictionary containing result values and artifact URIs.
|
|
1584
|
+
"""
|
|
1328
1585
|
self._outputs_wait_for_completion()
|
|
1586
|
+
outputs = {}
|
|
1587
|
+
|
|
1588
|
+
# Add results if available
|
|
1329
1589
|
if self.status.results:
|
|
1330
|
-
outputs
|
|
1590
|
+
outputs.update(self.status.results)
|
|
1591
|
+
|
|
1592
|
+
# Artifacts are usually cached in the run object under `status.artifacts`. However, the artifacts are not
|
|
1593
|
+
# stored in the DB as part of the run. The server may enrich the run with the artifacts or provide
|
|
1594
|
+
# `status.artifact_uris` instead. See mlrun.common.formatters.run.RunFormat.
|
|
1595
|
+
# When running locally - `status.artifact_uri` does not exist in the run.
|
|
1596
|
+
# When listing runs - `status.artifacts` does not exist in the run.
|
|
1331
1597
|
if self.status.artifacts:
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1598
|
+
outputs.update(self._process_artifacts(self.status.artifacts))
|
|
1599
|
+
elif self.status.artifact_uris:
|
|
1600
|
+
outputs.update(self.status.artifact_uris)
|
|
1601
|
+
|
|
1335
1602
|
return outputs
|
|
1336
1603
|
|
|
1337
|
-
def artifact(self, key) -> "mlrun.DataItem":
|
|
1338
|
-
"""
|
|
1604
|
+
def artifact(self, key: str) -> "mlrun.DataItem":
|
|
1605
|
+
"""Return artifact DataItem by key.
|
|
1606
|
+
|
|
1607
|
+
This method waits for the outputs to complete, searches for the artifact matching the given key,
|
|
1608
|
+
and returns a DataItem if the artifact is found.
|
|
1609
|
+
|
|
1610
|
+
:param key: The key of the artifact to find.
|
|
1611
|
+
:return: A DataItem corresponding to the artifact with the given key, or None if no such artifact is found.
|
|
1612
|
+
"""
|
|
1339
1613
|
self._outputs_wait_for_completion()
|
|
1340
1614
|
artifact = self._artifact(key)
|
|
1341
|
-
if artifact:
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
return None
|
|
1615
|
+
if not artifact:
|
|
1616
|
+
return None
|
|
1617
|
+
uri = get_artifact_target(artifact, self.metadata.project)
|
|
1618
|
+
return mlrun.get_dataitem(uri) if uri else None
|
|
1346
1619
|
|
|
1347
1620
|
def _outputs_wait_for_completion(
|
|
1348
1621
|
self,
|
|
@@ -1360,12 +1633,85 @@ class RunObject(RunTemplate):
|
|
|
1360
1633
|
)
|
|
1361
1634
|
|
|
1362
1635
|
def _artifact(self, key):
|
|
1363
|
-
"""
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1636
|
+
"""
|
|
1637
|
+
Return the last artifact DataItem that matches the given key.
|
|
1638
|
+
|
|
1639
|
+
If multiple artifacts with the same key exist, return the last one in the list.
|
|
1640
|
+
If there are artifacts with different tags, the method will return the one with a tag other than 'latest'
|
|
1641
|
+
if available.
|
|
1642
|
+
If no artifact with the given key is found, return None.
|
|
1643
|
+
|
|
1644
|
+
:param key: The key of the artifact to retrieve.
|
|
1645
|
+
:return: The last artifact DataItem with the given key, or None if no such artifact is found.
|
|
1646
|
+
"""
|
|
1647
|
+
if not self.status.artifacts:
|
|
1648
|
+
return None
|
|
1649
|
+
|
|
1650
|
+
# Collect artifacts that match the key
|
|
1651
|
+
matching_artifacts = [
|
|
1652
|
+
artifact
|
|
1653
|
+
for artifact in self.status.artifacts
|
|
1654
|
+
if artifact["metadata"].get("key") == key
|
|
1655
|
+
]
|
|
1656
|
+
|
|
1657
|
+
if not matching_artifacts:
|
|
1658
|
+
return None
|
|
1659
|
+
|
|
1660
|
+
# Sort matching artifacts by creation date in ascending order.
|
|
1661
|
+
# The last element in the list will be the one created most recently.
|
|
1662
|
+
# In case the `created` field does not exist in the artifact, that artifact will appear first in the sorted list
|
|
1663
|
+
matching_artifacts.sort(
|
|
1664
|
+
key=lambda artifact: artifact["metadata"].get("created", datetime.min)
|
|
1665
|
+
)
|
|
1666
|
+
|
|
1667
|
+
# Filter out artifacts with 'latest' tag
|
|
1668
|
+
non_latest_artifacts = [
|
|
1669
|
+
artifact
|
|
1670
|
+
for artifact in matching_artifacts
|
|
1671
|
+
if artifact["metadata"].get("tag") != "latest"
|
|
1672
|
+
]
|
|
1673
|
+
|
|
1674
|
+
# Return the last non-'latest' artifact if available, otherwise return the last artifact
|
|
1675
|
+
# In the case of only one tag, `status.artifacts` includes [v1, latest]. In that case, we want to return v1.
|
|
1676
|
+
# In the case of multiple tags, `status.artifacts` includes [v1, latest, v2, v3].
|
|
1677
|
+
# In that case, we need to return the last one (v3).
|
|
1678
|
+
return (non_latest_artifacts or matching_artifacts)[-1]
|
|
1679
|
+
|
|
1680
|
+
def _process_artifacts(self, artifacts):
|
|
1681
|
+
artifacts_by_key = {}
|
|
1682
|
+
|
|
1683
|
+
# Organize artifacts by key
|
|
1684
|
+
for artifact in artifacts:
|
|
1685
|
+
key = artifact["metadata"]["key"]
|
|
1686
|
+
if key not in artifacts_by_key:
|
|
1687
|
+
artifacts_by_key[key] = []
|
|
1688
|
+
artifacts_by_key[key].append(artifact)
|
|
1689
|
+
|
|
1690
|
+
outputs = {}
|
|
1691
|
+
for key, artifacts in artifacts_by_key.items():
|
|
1692
|
+
# Sort matching artifacts by creation date in ascending order.
|
|
1693
|
+
# The last element in the list will be the one created most recently.
|
|
1694
|
+
# In case the `created` field does not exist in the artifactthat artifact will appear
|
|
1695
|
+
# first in the sorted list
|
|
1696
|
+
artifacts.sort(
|
|
1697
|
+
key=lambda artifact: artifact["metadata"].get("created", datetime.min)
|
|
1698
|
+
)
|
|
1699
|
+
|
|
1700
|
+
# Filter out artifacts with 'latest' tag
|
|
1701
|
+
non_latest_artifacts = [
|
|
1702
|
+
artifact
|
|
1703
|
+
for artifact in artifacts
|
|
1704
|
+
if artifact["metadata"].get("tag") != "latest"
|
|
1705
|
+
]
|
|
1706
|
+
|
|
1707
|
+
# Save the last non-'latest' artifact if available, otherwise save the last artifact
|
|
1708
|
+
# In the case of only one tag, `artifacts` includes [v1, latest], in that case, we want to save v1.
|
|
1709
|
+
# In the case of multiple tags, `artifacts` includes [v1, latest, v2, v3].
|
|
1710
|
+
# In that case, we need to save the last one (v3).
|
|
1711
|
+
artifact_to_save = (non_latest_artifacts or artifacts)[-1]
|
|
1712
|
+
outputs[key] = get_artifact_target(artifact_to_save, self.metadata.project)
|
|
1713
|
+
|
|
1714
|
+
return outputs
|
|
1369
1715
|
|
|
1370
1716
|
def uid(self):
|
|
1371
1717
|
"""run unique id"""
|
|
@@ -1373,7 +1719,10 @@ class RunObject(RunTemplate):
|
|
|
1373
1719
|
|
|
1374
1720
|
def state(self):
|
|
1375
1721
|
"""current run state"""
|
|
1376
|
-
if
|
|
1722
|
+
if (
|
|
1723
|
+
self.status.state
|
|
1724
|
+
in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1725
|
+
):
|
|
1377
1726
|
return self.status.state
|
|
1378
1727
|
self.refresh()
|
|
1379
1728
|
return self.status.state or "unknown"
|
|
@@ -1437,7 +1786,7 @@ class RunObject(RunTemplate):
|
|
|
1437
1786
|
last_pull_log_time = None
|
|
1438
1787
|
logs_enabled = show_logs is not False
|
|
1439
1788
|
state = self.state()
|
|
1440
|
-
if state not in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1789
|
+
if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1441
1790
|
logger.info(
|
|
1442
1791
|
f"run {self.metadata.name} is not completed yet, waiting for it to complete",
|
|
1443
1792
|
current_state=state,
|
|
@@ -1447,7 +1796,8 @@ class RunObject(RunTemplate):
|
|
|
1447
1796
|
if (
|
|
1448
1797
|
logs_enabled
|
|
1449
1798
|
and logs_interval
|
|
1450
|
-
and state
|
|
1799
|
+
and state
|
|
1800
|
+
not in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1451
1801
|
and (
|
|
1452
1802
|
last_pull_log_time is None
|
|
1453
1803
|
or (datetime.now() - last_pull_log_time).seconds > logs_interval
|
|
@@ -1456,7 +1806,7 @@ class RunObject(RunTemplate):
|
|
|
1456
1806
|
last_pull_log_time = datetime.now()
|
|
1457
1807
|
state, offset = self.logs(watch=False, offset=offset)
|
|
1458
1808
|
|
|
1459
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1809
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1460
1810
|
if logs_enabled and logs_interval:
|
|
1461
1811
|
self.logs(watch=False, offset=offset)
|
|
1462
1812
|
break
|
|
@@ -1468,13 +1818,21 @@ class RunObject(RunTemplate):
|
|
|
1468
1818
|
)
|
|
1469
1819
|
if logs_enabled and not logs_interval:
|
|
1470
1820
|
self.logs(watch=False)
|
|
1471
|
-
if
|
|
1821
|
+
if (
|
|
1822
|
+
raise_on_failure
|
|
1823
|
+
and state != mlrun.common.runtimes.constants.RunStates.completed
|
|
1824
|
+
):
|
|
1472
1825
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1473
1826
|
f"Task {self.metadata.name} did not complete (state={state})"
|
|
1474
1827
|
)
|
|
1475
1828
|
|
|
1476
1829
|
return state
|
|
1477
1830
|
|
|
1831
|
+
def abort(self):
|
|
1832
|
+
"""abort the run"""
|
|
1833
|
+
db = mlrun.get_run_db()
|
|
1834
|
+
db.abort_run(self.metadata.uid, self.metadata.project)
|
|
1835
|
+
|
|
1478
1836
|
@staticmethod
|
|
1479
1837
|
def create_uri(project: str, uid: str, iteration: Union[int, str], tag: str = ""):
|
|
1480
1838
|
if tag:
|
|
@@ -1483,10 +1841,13 @@ class RunObject(RunTemplate):
|
|
|
1483
1841
|
return f"{project}@{uid}#{iteration}{tag}"
|
|
1484
1842
|
|
|
1485
1843
|
@staticmethod
|
|
1486
|
-
def parse_uri(uri: str) ->
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1844
|
+
def parse_uri(uri: str) -> tuple[str, str, str, str]:
|
|
1845
|
+
"""Parse the run's uri
|
|
1846
|
+
|
|
1847
|
+
:param uri: run uri in the format of <project>@<uid>#<iteration>[:tag]
|
|
1848
|
+
:return: project, uid, iteration, tag
|
|
1849
|
+
"""
|
|
1850
|
+
uri_pattern = mlrun.utils.regex.run_uri_pattern
|
|
1490
1851
|
match = re.match(uri_pattern, uri)
|
|
1491
1852
|
if not match:
|
|
1492
1853
|
raise ValueError(
|
|
@@ -1700,11 +2061,13 @@ class DataSource(ModelObj):
|
|
|
1700
2061
|
]
|
|
1701
2062
|
kind = None
|
|
1702
2063
|
|
|
2064
|
+
_fields_to_serialize = ["start_time", "end_time"]
|
|
2065
|
+
|
|
1703
2066
|
def __init__(
|
|
1704
2067
|
self,
|
|
1705
2068
|
name: str = None,
|
|
1706
2069
|
path: str = None,
|
|
1707
|
-
attributes:
|
|
2070
|
+
attributes: dict[str, object] = None,
|
|
1708
2071
|
key_field: str = None,
|
|
1709
2072
|
time_field: str = None,
|
|
1710
2073
|
schedule: str = None,
|
|
@@ -1728,6 +2091,16 @@ class DataSource(ModelObj):
|
|
|
1728
2091
|
def set_secrets(self, secrets):
|
|
1729
2092
|
self._secrets = secrets
|
|
1730
2093
|
|
|
2094
|
+
def _serialize_field(
|
|
2095
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
2096
|
+
) -> typing.Any:
|
|
2097
|
+
value = super()._serialize_field(struct, field_name, strip)
|
|
2098
|
+
# We pull the field from self and not from struct because it was excluded from the struct when looping over
|
|
2099
|
+
# the fields to save.
|
|
2100
|
+
if field_name in ("start_time", "end_time") and isinstance(value, datetime):
|
|
2101
|
+
return value.isoformat()
|
|
2102
|
+
return value
|
|
2103
|
+
|
|
1731
2104
|
|
|
1732
2105
|
class DataTargetBase(ModelObj):
|
|
1733
2106
|
"""data target spec, specify a destination for the feature set data"""
|
|
@@ -1770,16 +2143,16 @@ class DataTargetBase(ModelObj):
|
|
|
1770
2143
|
kind: str = None,
|
|
1771
2144
|
name: str = "",
|
|
1772
2145
|
path=None,
|
|
1773
|
-
attributes:
|
|
2146
|
+
attributes: dict[str, str] = None,
|
|
1774
2147
|
after_step=None,
|
|
1775
2148
|
partitioned: bool = False,
|
|
1776
2149
|
key_bucketing_number: Optional[int] = None,
|
|
1777
|
-
partition_cols: Optional[
|
|
2150
|
+
partition_cols: Optional[list[str]] = None,
|
|
1778
2151
|
time_partitioning_granularity: Optional[str] = None,
|
|
1779
2152
|
max_events: Optional[int] = None,
|
|
1780
2153
|
flush_after_seconds: Optional[int] = None,
|
|
1781
|
-
storage_options:
|
|
1782
|
-
schema:
|
|
2154
|
+
storage_options: dict[str, str] = None,
|
|
2155
|
+
schema: dict[str, Any] = None,
|
|
1783
2156
|
credentials_prefix=None,
|
|
1784
2157
|
):
|
|
1785
2158
|
self.name = name
|
|
@@ -1818,6 +2191,7 @@ class DataTarget(DataTargetBase):
|
|
|
1818
2191
|
"name",
|
|
1819
2192
|
"kind",
|
|
1820
2193
|
"path",
|
|
2194
|
+
"attributes",
|
|
1821
2195
|
"start_time",
|
|
1822
2196
|
"online",
|
|
1823
2197
|
"status",
|
|
@@ -1849,6 +2223,7 @@ class DataTarget(DataTargetBase):
|
|
|
1849
2223
|
self.last_written = None
|
|
1850
2224
|
self._producer = None
|
|
1851
2225
|
self.producer = {}
|
|
2226
|
+
self.attributes = {}
|
|
1852
2227
|
|
|
1853
2228
|
@property
|
|
1854
2229
|
def producer(self) -> FeatureSetProducer:
|
|
@@ -1866,8 +2241,8 @@ class VersionedObjMetadata(ModelObj):
|
|
|
1866
2241
|
tag: str = None,
|
|
1867
2242
|
uid: str = None,
|
|
1868
2243
|
project: str = None,
|
|
1869
|
-
labels:
|
|
1870
|
-
annotations:
|
|
2244
|
+
labels: dict[str, str] = None,
|
|
2245
|
+
annotations: dict[str, str] = None,
|
|
1871
2246
|
updated=None,
|
|
1872
2247
|
):
|
|
1873
2248
|
self.name = name
|