mlrun 1.7.0rc33__py3-none-any.whl → 1.7.0rc35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +1 -0
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/common.py +3 -0
- mlrun/common/schemas/function.py +7 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -2
- mlrun/common/schemas/model_monitoring/constants.py +3 -16
- mlrun/common/schemas/notification.py +1 -1
- mlrun/common/schemas/project.py +35 -3
- mlrun/common/types.py +1 -0
- mlrun/config.py +6 -7
- mlrun/datastore/sources.py +8 -4
- mlrun/db/base.py +7 -5
- mlrun/db/httpdb.py +10 -8
- mlrun/execution.py +1 -3
- mlrun/model.py +143 -23
- mlrun/model_monitoring/applications/context.py +13 -15
- mlrun/model_monitoring/applications/evidently_base.py +4 -5
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +5 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +2 -2
- mlrun/model_monitoring/db/tsdb/base.py +6 -3
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +22 -3
- mlrun/model_monitoring/stream_processing.py +5 -153
- mlrun/projects/pipelines.py +76 -73
- mlrun/projects/project.py +7 -1
- mlrun/run.py +26 -9
- mlrun/runtimes/nuclio/api_gateway.py +22 -6
- mlrun/runtimes/nuclio/application/application.py +62 -11
- mlrun/runtimes/nuclio/function.py +8 -0
- mlrun/runtimes/nuclio/serving.py +6 -6
- mlrun/runtimes/pod.py +2 -4
- mlrun/serving/server.py +12 -7
- mlrun/serving/states.py +16 -2
- mlrun/utils/db.py +3 -0
- mlrun/utils/helpers.py +30 -19
- mlrun/utils/notifications/notification/webhook.py +8 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/METADATA +4 -2
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/RECORD +43 -44
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc33.dist-info → mlrun-1.7.0rc35.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py
CHANGED
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -151,7 +151,6 @@ from .model_monitoring import (
|
|
|
151
151
|
ModelMonitoringMode,
|
|
152
152
|
ModelMonitoringStoreKinds,
|
|
153
153
|
MonitoringFunctionNames,
|
|
154
|
-
PrometheusEndpoints,
|
|
155
154
|
TSDBTarget,
|
|
156
155
|
V3IOTSDBTables,
|
|
157
156
|
)
|
|
@@ -170,6 +169,7 @@ from .project import (
|
|
|
170
169
|
Project,
|
|
171
170
|
ProjectDesiredState,
|
|
172
171
|
ProjectMetadata,
|
|
172
|
+
ProjectOut,
|
|
173
173
|
ProjectOutput,
|
|
174
174
|
ProjectOwner,
|
|
175
175
|
ProjectsOutput,
|
mlrun/common/schemas/common.py
CHANGED
mlrun/common/schemas/function.py
CHANGED
|
@@ -119,6 +119,13 @@ class FunctionSpec(pydantic.BaseModel):
|
|
|
119
119
|
service_account: typing.Optional[ServiceAccount]
|
|
120
120
|
state_thresholds: typing.Optional[StateThresholds]
|
|
121
121
|
|
|
122
|
+
class Config:
|
|
123
|
+
extra = pydantic.Extra.allow
|
|
124
|
+
|
|
122
125
|
|
|
123
126
|
class Function(pydantic.BaseModel):
|
|
124
127
|
spec: typing.Optional[FunctionSpec]
|
|
128
|
+
application: typing.Optional[dict[str, typing.Any]]
|
|
129
|
+
|
|
130
|
+
class Config:
|
|
131
|
+
extra = pydantic.Extra.allow
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .constants import (
|
|
16
|
+
V3IO_MODEL_MONITORING_DB,
|
|
16
17
|
ControllerPolicy,
|
|
17
18
|
DriftStatus,
|
|
18
19
|
EndpointType,
|
|
@@ -31,8 +32,6 @@ from .constants import (
|
|
|
31
32
|
MonitoringFunctionNames,
|
|
32
33
|
PredictionsQueryConstants,
|
|
33
34
|
ProjectSecretKeys,
|
|
34
|
-
PrometheusEndpoints,
|
|
35
|
-
PrometheusMetric,
|
|
36
35
|
ResultData,
|
|
37
36
|
ResultKindApp,
|
|
38
37
|
SchedulingKeys,
|
|
@@ -170,7 +170,6 @@ class StreamKind(MonitoringStrEnum):
|
|
|
170
170
|
class TSDBTarget(MonitoringStrEnum):
|
|
171
171
|
V3IO_TSDB = "v3io-tsdb"
|
|
172
172
|
TDEngine = "tdengine"
|
|
173
|
-
PROMETHEUS = "prometheus"
|
|
174
173
|
|
|
175
174
|
|
|
176
175
|
class ProjectSecretKeys:
|
|
@@ -231,21 +230,6 @@ class EndpointType(IntEnum):
|
|
|
231
230
|
LEAF_EP = 3 # end point that is a child of a router
|
|
232
231
|
|
|
233
232
|
|
|
234
|
-
class PrometheusMetric:
|
|
235
|
-
PREDICTIONS_TOTAL = "predictions_total"
|
|
236
|
-
MODEL_LATENCY_SECONDS = "model_latency_seconds"
|
|
237
|
-
INCOME_FEATURES = "income_features"
|
|
238
|
-
ERRORS_TOTAL = "errors_total"
|
|
239
|
-
DRIFT_METRICS = "drift_metrics"
|
|
240
|
-
DRIFT_STATUS = "drift_status"
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
class PrometheusEndpoints(MonitoringStrEnum):
|
|
244
|
-
MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
|
|
245
|
-
MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
|
|
246
|
-
MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
|
|
247
|
-
|
|
248
|
-
|
|
249
233
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
250
234
|
STREAM = "model-monitoring-stream"
|
|
251
235
|
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
@@ -381,3 +365,6 @@ class SpecialApps:
|
|
|
381
365
|
|
|
382
366
|
|
|
383
367
|
_RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
V3IO_MODEL_MONITORING_DB = "v3io"
|
|
@@ -55,7 +55,7 @@ class Notification(pydantic.BaseModel):
|
|
|
55
55
|
message: str
|
|
56
56
|
severity: NotificationSeverity
|
|
57
57
|
when: list[str]
|
|
58
|
-
condition: str
|
|
58
|
+
condition: str = None
|
|
59
59
|
params: dict[str, typing.Any] = None
|
|
60
60
|
status: NotificationStatus = None
|
|
61
61
|
sent_time: typing.Union[str, datetime.datetime] = None
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -100,6 +100,29 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
100
100
|
extra = pydantic.Extra.allow
|
|
101
101
|
|
|
102
102
|
|
|
103
|
+
class ProjectSpecOut(pydantic.BaseModel):
|
|
104
|
+
description: typing.Optional[str] = None
|
|
105
|
+
owner: typing.Optional[str] = None
|
|
106
|
+
goals: typing.Optional[str] = None
|
|
107
|
+
params: typing.Optional[dict] = {}
|
|
108
|
+
functions: typing.Optional[list] = []
|
|
109
|
+
workflows: typing.Optional[list] = []
|
|
110
|
+
artifacts: typing.Optional[list] = []
|
|
111
|
+
artifact_path: typing.Optional[str] = None
|
|
112
|
+
conda: typing.Optional[str] = None
|
|
113
|
+
source: typing.Optional[str] = None
|
|
114
|
+
subpath: typing.Optional[str] = None
|
|
115
|
+
origin_url: typing.Optional[str] = None
|
|
116
|
+
desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
|
|
117
|
+
custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
|
|
118
|
+
default_image: typing.Optional[str] = None
|
|
119
|
+
build: typing.Any = None
|
|
120
|
+
default_function_node_selector: typing.Optional[dict] = {}
|
|
121
|
+
|
|
122
|
+
class Config:
|
|
123
|
+
extra = pydantic.Extra.allow
|
|
124
|
+
|
|
125
|
+
|
|
103
126
|
class Project(pydantic.BaseModel):
|
|
104
127
|
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
105
128
|
metadata: ProjectMetadata
|
|
@@ -107,6 +130,15 @@ class Project(pydantic.BaseModel):
|
|
|
107
130
|
status: ObjectStatus = ObjectStatus()
|
|
108
131
|
|
|
109
132
|
|
|
133
|
+
# The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
|
|
134
|
+
# the response as the validation was added late and there may be corrupted values in the DB.
|
|
135
|
+
class ProjectOut(pydantic.BaseModel):
|
|
136
|
+
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
137
|
+
metadata: ProjectMetadata
|
|
138
|
+
spec: ProjectSpecOut = ProjectSpecOut()
|
|
139
|
+
status: ObjectStatus = ObjectStatus()
|
|
140
|
+
|
|
141
|
+
|
|
110
142
|
class ProjectOwner(pydantic.BaseModel):
|
|
111
143
|
username: str
|
|
112
144
|
access_key: str
|
|
@@ -134,16 +166,16 @@ class IguazioProject(pydantic.BaseModel):
|
|
|
134
166
|
|
|
135
167
|
|
|
136
168
|
# The format query param controls the project type used:
|
|
137
|
-
# full -
|
|
169
|
+
# full - ProjectOut
|
|
138
170
|
# name_only - str
|
|
139
171
|
# summary - ProjectSummary
|
|
140
172
|
# leader - currently only IguazioProject supported
|
|
141
173
|
# The way pydantic handles typing.Union is that it takes the object and tries to coerce it to be the types of the
|
|
142
|
-
# union by the definition order. Therefore we can't currently add generic dict for all leader formats, but we need
|
|
174
|
+
# union by the definition order. Therefore, we can't currently add generic dict for all leader formats, but we need
|
|
143
175
|
# to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
|
|
144
176
|
# https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
|
|
145
177
|
ProjectOutput = typing.TypeVar(
|
|
146
|
-
"ProjectOutput",
|
|
178
|
+
"ProjectOutput", ProjectOut, str, ProjectSummary, IguazioProject
|
|
147
179
|
)
|
|
148
180
|
|
|
149
181
|
|
mlrun/common/types.py
CHANGED
mlrun/config.py
CHANGED
|
@@ -485,10 +485,10 @@ default_config = {
|
|
|
485
485
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
486
486
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
487
487
|
"mlrun_version_specifier": "",
|
|
488
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
488
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.23.2", # kaniko builder image
|
|
489
489
|
"kaniko_init_container_image": "alpine:3.18",
|
|
490
490
|
# image for kaniko init container when docker registry is ECR
|
|
491
|
-
"kaniko_aws_cli_image": "amazon/aws-cli:2.
|
|
491
|
+
"kaniko_aws_cli_image": "amazon/aws-cli:2.17.16",
|
|
492
492
|
# kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
|
|
493
493
|
# a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
|
|
494
494
|
"kaniko_image_fs_extraction_retries": "3",
|
|
@@ -1237,12 +1237,11 @@ class Config:
|
|
|
1237
1237
|
|
|
1238
1238
|
return storage_options
|
|
1239
1239
|
|
|
1240
|
-
def
|
|
1241
|
-
if not version:
|
|
1242
|
-
version = self.nuclio_version
|
|
1240
|
+
def is_explicit_ack_enabled(self) -> bool:
|
|
1243
1241
|
return self.httpdb.nuclio.explicit_ack == "enabled" and (
|
|
1244
|
-
not
|
|
1245
|
-
or semver.VersionInfo.parse(
|
|
1242
|
+
not self.nuclio_version
|
|
1243
|
+
or semver.VersionInfo.parse(self.nuclio_version)
|
|
1244
|
+
>= semver.VersionInfo.parse("1.12.10")
|
|
1246
1245
|
)
|
|
1247
1246
|
|
|
1248
1247
|
|
mlrun/datastore/sources.py
CHANGED
|
@@ -85,7 +85,8 @@ class BaseSourceDriver(DataSource):
|
|
|
85
85
|
)
|
|
86
86
|
|
|
87
87
|
explicit_ack = (
|
|
88
|
-
is_explicit_ack_supported(context)
|
|
88
|
+
is_explicit_ack_supported(context)
|
|
89
|
+
and mlrun.mlconf.is_explicit_ack_enabled()
|
|
89
90
|
)
|
|
90
91
|
return storey.SyncEmitSource(
|
|
91
92
|
context=context,
|
|
@@ -944,7 +945,8 @@ class OnlineSource(BaseSourceDriver):
|
|
|
944
945
|
|
|
945
946
|
source_args = self.attributes.get("source_args", {})
|
|
946
947
|
explicit_ack = (
|
|
947
|
-
is_explicit_ack_supported(context)
|
|
948
|
+
is_explicit_ack_supported(context)
|
|
949
|
+
and mlrun.mlconf.is_explicit_ack_enabled()
|
|
948
950
|
)
|
|
949
951
|
# TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
|
|
950
952
|
src_class = storey.SyncEmitSource(
|
|
@@ -1029,7 +1031,8 @@ class StreamSource(OnlineSource):
|
|
|
1029
1031
|
engine = "async"
|
|
1030
1032
|
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1031
1033
|
engine = function.spec.graph.engine
|
|
1032
|
-
|
|
1034
|
+
|
|
1035
|
+
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
1033
1036
|
kwargs["explicit_ack_mode"] = "explicitOnly"
|
|
1034
1037
|
kwargs["worker_allocation_mode"] = "static"
|
|
1035
1038
|
|
|
@@ -1116,7 +1119,8 @@ class KafkaSource(OnlineSource):
|
|
|
1116
1119
|
engine = "async"
|
|
1117
1120
|
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1118
1121
|
engine = function.spec.graph.engine
|
|
1119
|
-
|
|
1122
|
+
|
|
1123
|
+
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
1120
1124
|
explicit_ack_mode = "explicitOnly"
|
|
1121
1125
|
extra_attributes["workerAllocationMode"] = extra_attributes.get(
|
|
1122
1126
|
"worker_allocation_mode", "static"
|
mlrun/db/base.py
CHANGED
|
@@ -242,9 +242,8 @@ class RunDBInterface(ABC):
|
|
|
242
242
|
)
|
|
243
243
|
artifact_identifiers.append(
|
|
244
244
|
mlrun.common.schemas.ArtifactIdentifier(
|
|
245
|
-
key
|
|
246
|
-
|
|
247
|
-
# pass the tree as uid
|
|
245
|
+
# we pass the db_key and not the key so the API will be able to find the artifact in the db
|
|
246
|
+
key=mlrun.utils.get_in_artifact(artifact_obj, "db_key"),
|
|
248
247
|
uid=mlrun.utils.get_in_artifact(artifact_obj, "uid"),
|
|
249
248
|
producer_id=mlrun.utils.get_in_artifact(artifact_obj, "tree"),
|
|
250
249
|
kind=mlrun.utils.get_in_artifact(artifact_obj, "kind"),
|
|
@@ -690,8 +689,11 @@ class RunDBInterface(ABC):
|
|
|
690
689
|
@abstractmethod
|
|
691
690
|
def store_api_gateway(
|
|
692
691
|
self,
|
|
693
|
-
api_gateway:
|
|
694
|
-
|
|
692
|
+
api_gateway: Union[
|
|
693
|
+
mlrun.common.schemas.APIGateway,
|
|
694
|
+
"mlrun.runtimes.nuclio.api_gateway.APIGateway",
|
|
695
|
+
],
|
|
696
|
+
project: Optional[str] = None,
|
|
695
697
|
):
|
|
696
698
|
pass
|
|
697
699
|
|
mlrun/db/httpdb.py
CHANGED
|
@@ -1015,7 +1015,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1015
1015
|
"format": format_,
|
|
1016
1016
|
"tag": tag,
|
|
1017
1017
|
"tree": tree,
|
|
1018
|
-
"uid": uid,
|
|
1018
|
+
"object-uid": uid,
|
|
1019
1019
|
}
|
|
1020
1020
|
if iter is not None:
|
|
1021
1021
|
params["iter"] = str(iter)
|
|
@@ -1051,7 +1051,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1051
1051
|
"key": key,
|
|
1052
1052
|
"tag": tag,
|
|
1053
1053
|
"tree": tree,
|
|
1054
|
-
"uid": uid,
|
|
1054
|
+
"object-uid": uid,
|
|
1055
1055
|
"iter": iter,
|
|
1056
1056
|
"deletion_strategy": deletion_strategy,
|
|
1057
1057
|
}
|
|
@@ -1071,8 +1071,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1071
1071
|
project=None,
|
|
1072
1072
|
tag=None,
|
|
1073
1073
|
labels: Optional[Union[dict[str, str], list[str]]] = None,
|
|
1074
|
-
since=None,
|
|
1075
|
-
until=None,
|
|
1074
|
+
since: Optional[datetime] = None,
|
|
1075
|
+
until: Optional[datetime] = None,
|
|
1076
1076
|
iter: int = None,
|
|
1077
1077
|
best_iteration: bool = False,
|
|
1078
1078
|
kind: str = None,
|
|
@@ -1102,8 +1102,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1102
1102
|
:param tag: Return artifacts assigned this tag.
|
|
1103
1103
|
:param labels: Return artifacts that have these labels. Labels can either be a dictionary {"label": "value"} or
|
|
1104
1104
|
a list of "label=value" (match label key and value) or "label" (match just label key) strings.
|
|
1105
|
-
:param since:
|
|
1106
|
-
:param until:
|
|
1105
|
+
:param since: Return artifacts updated after this date (as datetime object).
|
|
1106
|
+
:param until: Return artifacts updated before this date (as datetime object).
|
|
1107
1107
|
:param iter: Return artifacts from a specific iteration (where ``iter=0`` means the root iteration). If
|
|
1108
1108
|
``None`` (default) return artifacts from all iterations.
|
|
1109
1109
|
:param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
|
|
@@ -1137,6 +1137,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1137
1137
|
"format": format_,
|
|
1138
1138
|
"producer_uri": producer_uri,
|
|
1139
1139
|
"limit": limit,
|
|
1140
|
+
"since": datetime_to_iso(since),
|
|
1141
|
+
"until": datetime_to_iso(until),
|
|
1140
1142
|
}
|
|
1141
1143
|
error = "list artifacts"
|
|
1142
1144
|
endpoint_path = f"projects/{project}/artifacts"
|
|
@@ -1684,7 +1686,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1684
1686
|
last_log_timestamp = float(
|
|
1685
1687
|
resp.headers.get("x-mlrun-last-timestamp", "0.0")
|
|
1686
1688
|
)
|
|
1687
|
-
if func.kind in mlrun.runtimes.RuntimeKinds.
|
|
1689
|
+
if func.kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes():
|
|
1688
1690
|
mlrun.runtimes.nuclio.function.enrich_nuclio_function_from_headers(
|
|
1689
1691
|
func, resp.headers
|
|
1690
1692
|
)
|
|
@@ -3378,7 +3380,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3378
3380
|
By default, the image is mlrun/mlrun.
|
|
3379
3381
|
"""
|
|
3380
3382
|
self.api_call(
|
|
3381
|
-
method=mlrun.common.types.HTTPMethod.
|
|
3383
|
+
method=mlrun.common.types.HTTPMethod.PATCH,
|
|
3382
3384
|
path=f"projects/{project}/model-monitoring/model-monitoring-controller",
|
|
3383
3385
|
params={
|
|
3384
3386
|
"base_period": base_period,
|
mlrun/execution.py
CHANGED
|
@@ -78,7 +78,6 @@ class MLClientCtx:
|
|
|
78
78
|
self._tmpfile = tmp
|
|
79
79
|
self._logger = log_stream or logger
|
|
80
80
|
self._log_level = "info"
|
|
81
|
-
self._matrics_db = None
|
|
82
81
|
self._autocommit = autocommit
|
|
83
82
|
self._notifications = []
|
|
84
83
|
self._state_thresholds = {}
|
|
@@ -103,8 +102,7 @@ class MLClientCtx:
|
|
|
103
102
|
self._error = None
|
|
104
103
|
self._commit = ""
|
|
105
104
|
self._host = None
|
|
106
|
-
self._start_time = now_date()
|
|
107
|
-
self._last_update = now_date()
|
|
105
|
+
self._start_time = self._last_update = now_date()
|
|
108
106
|
self._iteration_results = None
|
|
109
107
|
self._children = []
|
|
110
108
|
self._parent = None
|
mlrun/model.py
CHANGED
|
@@ -754,7 +754,7 @@ class Notification(ModelObj):
|
|
|
754
754
|
"Both 'secret_params' and 'params' are empty, at least one must be defined."
|
|
755
755
|
)
|
|
756
756
|
|
|
757
|
-
notification_class.validate_params(secret_params
|
|
757
|
+
notification_class.validate_params(secret_params | params)
|
|
758
758
|
|
|
759
759
|
@staticmethod
|
|
760
760
|
def validate_notification_uniqueness(notifications: list["Notification"]):
|
|
@@ -1490,14 +1490,37 @@ class RunObject(RunTemplate):
|
|
|
1490
1490
|
)
|
|
1491
1491
|
return ""
|
|
1492
1492
|
|
|
1493
|
-
def output(self, key):
|
|
1494
|
-
"""
|
|
1493
|
+
def output(self, key: str):
|
|
1494
|
+
"""
|
|
1495
|
+
Return the value of a specific result or artifact by key.
|
|
1496
|
+
|
|
1497
|
+
This method waits for the outputs to complete and retrieves the value corresponding to the provided key.
|
|
1498
|
+
If the key exists in the results, it returns the corresponding result value.
|
|
1499
|
+
If not found in results, it attempts to fetch the artifact by key (cached in the run status).
|
|
1500
|
+
If the artifact is not found, it tries to fetch the artifact URI by key.
|
|
1501
|
+
If no artifact or result is found for the key, returns None.
|
|
1502
|
+
|
|
1503
|
+
:param key: The key of the result or artifact to retrieve.
|
|
1504
|
+
:return: The value of the result or the artifact URI corresponding to the key, or None if not found.
|
|
1505
|
+
"""
|
|
1495
1506
|
self._outputs_wait_for_completion()
|
|
1507
|
+
|
|
1508
|
+
# Check if the key exists in results and return the result value
|
|
1496
1509
|
if self.status.results and key in self.status.results:
|
|
1497
|
-
return self.status.results
|
|
1510
|
+
return self.status.results[key]
|
|
1511
|
+
|
|
1512
|
+
# Artifacts are usually cached in the run object under `status.artifacts`. However, the artifacts are not
|
|
1513
|
+
# stored in the DB as part of the run. The server may enrich the run with the artifacts or provide
|
|
1514
|
+
# `status.artifact_uris` instead. See mlrun.common.formatters.run.RunFormat.
|
|
1515
|
+
# When running locally - `status.artifact_uri` does not exist in the run.
|
|
1516
|
+
# When listing runs - `status.artifacts` does not exist in the run.
|
|
1498
1517
|
artifact = self._artifact(key)
|
|
1499
1518
|
if artifact:
|
|
1500
1519
|
return get_artifact_target(artifact, self.metadata.project)
|
|
1520
|
+
|
|
1521
|
+
if self.status.artifact_uris and key in self.status.artifact_uris:
|
|
1522
|
+
return self.status.artifact_uris[key]
|
|
1523
|
+
|
|
1501
1524
|
return None
|
|
1502
1525
|
|
|
1503
1526
|
@property
|
|
@@ -1510,26 +1533,50 @@ class RunObject(RunTemplate):
|
|
|
1510
1533
|
|
|
1511
1534
|
@property
|
|
1512
1535
|
def outputs(self):
|
|
1513
|
-
"""
|
|
1514
|
-
outputs
|
|
1536
|
+
"""
|
|
1537
|
+
Return a dictionary of outputs, including result values and artifact URIs.
|
|
1538
|
+
|
|
1539
|
+
This method waits for the outputs to complete and combines result values
|
|
1540
|
+
and artifact URIs into a single dictionary. If there are multiple artifacts
|
|
1541
|
+
for the same key, only include the artifact that does not have the "latest" tag.
|
|
1542
|
+
If there is no other tag, include the "latest" tag as a fallback.
|
|
1543
|
+
|
|
1544
|
+
:return: Dictionary containing result values and artifact URIs.
|
|
1545
|
+
"""
|
|
1515
1546
|
self._outputs_wait_for_completion()
|
|
1547
|
+
outputs = {}
|
|
1548
|
+
|
|
1549
|
+
# Add results if available
|
|
1516
1550
|
if self.status.results:
|
|
1517
|
-
outputs
|
|
1551
|
+
outputs.update(self.status.results)
|
|
1552
|
+
|
|
1553
|
+
# Artifacts are usually cached in the run object under `status.artifacts`. However, the artifacts are not
|
|
1554
|
+
# stored in the DB as part of the run. The server may enrich the run with the artifacts or provide
|
|
1555
|
+
# `status.artifact_uris` instead. See mlrun.common.formatters.run.RunFormat.
|
|
1556
|
+
# When running locally - `status.artifact_uri` does not exist in the run.
|
|
1557
|
+
# When listing runs - `status.artifacts` does not exist in the run.
|
|
1518
1558
|
if self.status.artifacts:
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1559
|
+
outputs.update(self._process_artifacts(self.status.artifacts))
|
|
1560
|
+
elif self.status.artifact_uris:
|
|
1561
|
+
outputs.update(self.status.artifact_uris)
|
|
1562
|
+
|
|
1522
1563
|
return outputs
|
|
1523
1564
|
|
|
1524
|
-
def artifact(self, key) -> "mlrun.DataItem":
|
|
1525
|
-
"""
|
|
1565
|
+
def artifact(self, key: str) -> "mlrun.DataItem":
|
|
1566
|
+
"""Return artifact DataItem by key.
|
|
1567
|
+
|
|
1568
|
+
This method waits for the outputs to complete, searches for the artifact matching the given key,
|
|
1569
|
+
and returns a DataItem if the artifact is found.
|
|
1570
|
+
|
|
1571
|
+
:param key: The key of the artifact to find.
|
|
1572
|
+
:return: A DataItem corresponding to the artifact with the given key, or None if no such artifact is found.
|
|
1573
|
+
"""
|
|
1526
1574
|
self._outputs_wait_for_completion()
|
|
1527
1575
|
artifact = self._artifact(key)
|
|
1528
|
-
if artifact:
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
return None
|
|
1576
|
+
if not artifact:
|
|
1577
|
+
return None
|
|
1578
|
+
uri = get_artifact_target(artifact, self.metadata.project)
|
|
1579
|
+
return mlrun.get_dataitem(uri) if uri else None
|
|
1533
1580
|
|
|
1534
1581
|
def _outputs_wait_for_completion(
|
|
1535
1582
|
self,
|
|
@@ -1547,12 +1594,85 @@ class RunObject(RunTemplate):
|
|
|
1547
1594
|
)
|
|
1548
1595
|
|
|
1549
1596
|
def _artifact(self, key):
|
|
1550
|
-
"""
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1597
|
+
"""
|
|
1598
|
+
Return the last artifact DataItem that matches the given key.
|
|
1599
|
+
|
|
1600
|
+
If multiple artifacts with the same key exist, return the last one in the list.
|
|
1601
|
+
If there are artifacts with different tags, the method will return the one with a tag other than 'latest'
|
|
1602
|
+
if available.
|
|
1603
|
+
If no artifact with the given key is found, return None.
|
|
1604
|
+
|
|
1605
|
+
:param key: The key of the artifact to retrieve.
|
|
1606
|
+
:return: The last artifact DataItem with the given key, or None if no such artifact is found.
|
|
1607
|
+
"""
|
|
1608
|
+
if not self.status.artifacts:
|
|
1609
|
+
return None
|
|
1610
|
+
|
|
1611
|
+
# Collect artifacts that match the key
|
|
1612
|
+
matching_artifacts = [
|
|
1613
|
+
artifact
|
|
1614
|
+
for artifact in self.status.artifacts
|
|
1615
|
+
if artifact["metadata"].get("key") == key
|
|
1616
|
+
]
|
|
1617
|
+
|
|
1618
|
+
if not matching_artifacts:
|
|
1619
|
+
return None
|
|
1620
|
+
|
|
1621
|
+
# Sort matching artifacts by creation date in ascending order.
|
|
1622
|
+
# The last element in the list will be the one created most recently.
|
|
1623
|
+
# In case the `created` field does not exist in the artifact, that artifact will appear first in the sorted list
|
|
1624
|
+
matching_artifacts.sort(
|
|
1625
|
+
key=lambda artifact: artifact["metadata"].get("created", datetime.min)
|
|
1626
|
+
)
|
|
1627
|
+
|
|
1628
|
+
# Filter out artifacts with 'latest' tag
|
|
1629
|
+
non_latest_artifacts = [
|
|
1630
|
+
artifact
|
|
1631
|
+
for artifact in matching_artifacts
|
|
1632
|
+
if artifact["metadata"].get("tag") != "latest"
|
|
1633
|
+
]
|
|
1634
|
+
|
|
1635
|
+
# Return the last non-'latest' artifact if available, otherwise return the last artifact
|
|
1636
|
+
# In the case of only one tag, `status.artifacts` includes [v1, latest]. In that case, we want to return v1.
|
|
1637
|
+
# In the case of multiple tags, `status.artifacts` includes [v1, latest, v2, v3].
|
|
1638
|
+
# In that case, we need to return the last one (v3).
|
|
1639
|
+
return (non_latest_artifacts or matching_artifacts)[-1]
|
|
1640
|
+
|
|
1641
|
+
def _process_artifacts(self, artifacts):
|
|
1642
|
+
artifacts_by_key = {}
|
|
1643
|
+
|
|
1644
|
+
# Organize artifacts by key
|
|
1645
|
+
for artifact in artifacts:
|
|
1646
|
+
key = artifact["metadata"]["key"]
|
|
1647
|
+
if key not in artifacts_by_key:
|
|
1648
|
+
artifacts_by_key[key] = []
|
|
1649
|
+
artifacts_by_key[key].append(artifact)
|
|
1650
|
+
|
|
1651
|
+
outputs = {}
|
|
1652
|
+
for key, artifacts in artifacts_by_key.items():
|
|
1653
|
+
# Sort matching artifacts by creation date in ascending order.
|
|
1654
|
+
# The last element in the list will be the one created most recently.
|
|
1655
|
+
# In case the `created` field does not exist in the artifactthat artifact will appear
|
|
1656
|
+
# first in the sorted list
|
|
1657
|
+
artifacts.sort(
|
|
1658
|
+
key=lambda artifact: artifact["metadata"].get("created", datetime.min)
|
|
1659
|
+
)
|
|
1660
|
+
|
|
1661
|
+
# Filter out artifacts with 'latest' tag
|
|
1662
|
+
non_latest_artifacts = [
|
|
1663
|
+
artifact
|
|
1664
|
+
for artifact in artifacts
|
|
1665
|
+
if artifact["metadata"].get("tag") != "latest"
|
|
1666
|
+
]
|
|
1667
|
+
|
|
1668
|
+
# Save the last non-'latest' artifact if available, otherwise save the last artifact
|
|
1669
|
+
# In the case of only one tag, `artifacts` includes [v1, latest], in that case, we want to save v1.
|
|
1670
|
+
# In the case of multiple tags, `artifacts` includes [v1, latest, v2, v3].
|
|
1671
|
+
# In that case, we need to save the last one (v3).
|
|
1672
|
+
artifact_to_save = (non_latest_artifacts or artifacts)[-1]
|
|
1673
|
+
outputs[key] = get_artifact_target(artifact_to_save, self.metadata.project)
|
|
1674
|
+
|
|
1675
|
+
return outputs
|
|
1556
1676
|
|
|
1557
1677
|
def uid(self):
|
|
1558
1678
|
"""run unique id"""
|
|
@@ -56,7 +56,7 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
56
56
|
def __init__(self, **kwargs):
|
|
57
57
|
super().__init__(**kwargs)
|
|
58
58
|
|
|
59
|
-
def
|
|
59
|
+
def _enrich_data(self):
|
|
60
60
|
self.application_name: typing.Optional[str] = None
|
|
61
61
|
self.start_infer_time: typing.Optional[pd.Timestamp] = None
|
|
62
62
|
self.end_infer_time: typing.Optional[pd.Timestamp] = None
|
|
@@ -87,39 +87,37 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
87
87
|
"""
|
|
88
88
|
|
|
89
89
|
if not context:
|
|
90
|
-
|
|
90
|
+
ctx = (
|
|
91
91
|
super().from_dict(
|
|
92
92
|
attrs=attrs.get(mm_constants.ApplicationEvent.MLRUN_CONTEXT, {}),
|
|
93
93
|
**kwargs,
|
|
94
94
|
),
|
|
95
95
|
)
|
|
96
96
|
else:
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
ctx = context
|
|
98
|
+
cls._enrich_data(ctx)
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
ctx.start_infer_time = pd.Timestamp(
|
|
101
101
|
attrs.get(mm_constants.ApplicationEvent.START_INFER_TIME)
|
|
102
102
|
)
|
|
103
|
-
|
|
103
|
+
ctx.end_infer_time = pd.Timestamp(
|
|
104
104
|
attrs.get(mm_constants.ApplicationEvent.END_INFER_TIME)
|
|
105
105
|
)
|
|
106
|
-
|
|
106
|
+
ctx.latest_request = pd.Timestamp(
|
|
107
107
|
attrs.get(mm_constants.ApplicationEvent.LAST_REQUEST)
|
|
108
108
|
)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
112
|
-
self._feature_stats = json.loads(
|
|
109
|
+
ctx.application_name = attrs.get(mm_constants.ApplicationEvent.APPLICATION_NAME)
|
|
110
|
+
ctx._feature_stats = json.loads(
|
|
113
111
|
attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
|
|
114
112
|
)
|
|
115
|
-
|
|
113
|
+
ctx._sample_df_stats = json.loads(
|
|
116
114
|
attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
|
|
117
115
|
)
|
|
118
116
|
|
|
119
|
-
|
|
120
|
-
|
|
117
|
+
ctx.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
|
|
118
|
+
ctx._model_endpoint = model_endpoint_dict.get(ctx.endpoint_id)
|
|
121
119
|
|
|
122
|
-
return
|
|
120
|
+
return ctx
|
|
123
121
|
|
|
124
122
|
@property
|
|
125
123
|
def sample_df(self) -> pd.DataFrame:
|