mlrun 1.7.0rc15__py3-none-any.whl → 1.7.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -4
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/artifacts/__init__.py +7 -1
- mlrun/artifacts/base.py +28 -3
- mlrun/artifacts/dataset.py +8 -0
- mlrun/artifacts/manager.py +18 -0
- mlrun/artifacts/model.py +8 -1
- mlrun/artifacts/plots.py +13 -0
- mlrun/common/schemas/__init__.py +10 -2
- mlrun/common/schemas/alert.py +64 -5
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -1
- mlrun/common/schemas/model_monitoring/constants.py +17 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +60 -1
- mlrun/common/schemas/project.py +5 -1
- mlrun/config.py +11 -4
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/db/base.py +24 -4
- mlrun/db/httpdb.py +97 -43
- mlrun/db/nopdb.py +25 -4
- mlrun/errors.py +5 -0
- mlrun/launcher/base.py +3 -2
- mlrun/lists.py +4 -0
- mlrun/model.py +15 -8
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +1 -2
- mlrun/model_monitoring/applications/context.py +1 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +64 -38
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +56 -202
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +442 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/stream_processing.py +46 -210
- mlrun/model_monitoring/writer.py +50 -100
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +19 -200
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +62 -17
- mlrun/render.py +9 -3
- mlrun/run.py +5 -38
- mlrun/runtimes/__init__.py +1 -0
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/nuclio/api_gateway.py +163 -77
- mlrun/runtimes/nuclio/application/application.py +160 -7
- mlrun/runtimes/nuclio/function.py +25 -45
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +0 -38
- mlrun/track/tracker.py +2 -1
- mlrun/utils/helpers.py +51 -31
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +9 -4
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +21 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/RECORD +75 -69
- mlrun/kfpops.py +0 -860
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/top_level.txt +0 -0
|
@@ -156,7 +156,7 @@ class EventKeyMetrics:
|
|
|
156
156
|
REAL_TIME = "real_time"
|
|
157
157
|
|
|
158
158
|
|
|
159
|
-
class
|
|
159
|
+
class TimeSeriesConnector:
|
|
160
160
|
TSDB = "tsdb"
|
|
161
161
|
|
|
162
162
|
|
|
@@ -188,12 +188,14 @@ class SchedulingKeys:
|
|
|
188
188
|
class FileTargetKind:
|
|
189
189
|
ENDPOINTS = "endpoints"
|
|
190
190
|
EVENTS = "events"
|
|
191
|
+
PREDICTIONS = "predictions"
|
|
191
192
|
STREAM = "stream"
|
|
192
193
|
PARQUET = "parquet"
|
|
193
194
|
APPS_PARQUET = "apps_parquet"
|
|
194
195
|
LOG_STREAM = "log_stream"
|
|
195
196
|
APP_RESULTS = "app_results"
|
|
196
197
|
MONITORING_SCHEDULES = "monitoring_schedules"
|
|
198
|
+
MONITORING_APPLICATION = "monitoring_application"
|
|
197
199
|
|
|
198
200
|
|
|
199
201
|
class ModelMonitoringMode(str, Enum):
|
|
@@ -228,6 +230,12 @@ class MonitoringFunctionNames(MonitoringStrEnum):
|
|
|
228
230
|
WRITER = "model-monitoring-writer"
|
|
229
231
|
|
|
230
232
|
|
|
233
|
+
class MonitoringTSDBTables(MonitoringStrEnum):
|
|
234
|
+
APP_RESULTS = "app-results"
|
|
235
|
+
METRICS = "metrics"
|
|
236
|
+
EVENTS = "events"
|
|
237
|
+
|
|
238
|
+
|
|
231
239
|
@dataclass
|
|
232
240
|
class FunctionURI:
|
|
233
241
|
project: str
|
|
@@ -329,6 +337,14 @@ class ControllerPolicy:
|
|
|
329
337
|
BASE_PERIOD = "base_period"
|
|
330
338
|
|
|
331
339
|
|
|
340
|
+
class TSDBTarget:
|
|
341
|
+
V3IO_TSDB = "v3io-tsdb"
|
|
342
|
+
PROMETHEUS = "prometheus"
|
|
343
|
+
APP_RESULTS_TABLE = "app-results"
|
|
344
|
+
V3IO_BE = "tsdb"
|
|
345
|
+
V3IO_RATE = "1/s"
|
|
346
|
+
|
|
347
|
+
|
|
332
348
|
class HistogramDataDriftApplicationConstants:
|
|
333
349
|
NAME = "histogram-data-drift"
|
|
334
350
|
GENERAL_RESULT_NAME = "general_drift"
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
|
|
15
15
|
import enum
|
|
16
16
|
import json
|
|
17
|
-
|
|
17
|
+
import re
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, NamedTuple, Optional
|
|
18
20
|
|
|
19
21
|
from pydantic import BaseModel, Field, validator
|
|
20
22
|
from pydantic.main import Extra
|
|
@@ -29,6 +31,8 @@ from .constants import (
|
|
|
29
31
|
EventKeyMetrics,
|
|
30
32
|
EventLiveStats,
|
|
31
33
|
ModelMonitoringMode,
|
|
34
|
+
ResultKindApp,
|
|
35
|
+
ResultStatusApp,
|
|
32
36
|
)
|
|
33
37
|
|
|
34
38
|
|
|
@@ -304,6 +308,61 @@ class ModelEndpointMonitoringMetric(BaseModel):
|
|
|
304
308
|
full_name: str
|
|
305
309
|
|
|
306
310
|
|
|
311
|
+
def _compose_full_name(
|
|
312
|
+
*,
|
|
313
|
+
project: str,
|
|
314
|
+
app: str,
|
|
315
|
+
name: str,
|
|
316
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
|
|
317
|
+
) -> str:
|
|
318
|
+
return ".".join([project, app, type, name])
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
_FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
322
|
+
_FQN_PATTERN = (
|
|
323
|
+
rf"^(?P<project>{_FQN_PART_PATTERN})\."
|
|
324
|
+
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
325
|
+
rf"(?P<type>{_FQN_PART_PATTERN})\."
|
|
326
|
+
rf"(?P<name>{_FQN_PART_PATTERN})$"
|
|
327
|
+
)
|
|
328
|
+
_FQN_REGEX = re.compile(_FQN_PATTERN)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
|
|
332
|
+
match = _FQN_REGEX.fullmatch(fqn)
|
|
333
|
+
if match is None:
|
|
334
|
+
raise ValueError("The fully qualified name is not in the expected format")
|
|
335
|
+
return ModelEndpointMonitoringMetric.parse_obj(
|
|
336
|
+
match.groupdict() | {"full_name": fqn}
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class _ResultPoint(NamedTuple):
|
|
341
|
+
timestamp: datetime
|
|
342
|
+
value: float
|
|
343
|
+
status: ResultStatusApp
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class _ModelEndpointMonitoringResultValuesBase(BaseModel):
|
|
347
|
+
full_name: str
|
|
348
|
+
type: ModelEndpointMonitoringMetricType
|
|
349
|
+
data: bool
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringResultValuesBase):
|
|
353
|
+
full_name: str
|
|
354
|
+
type: ModelEndpointMonitoringMetricType
|
|
355
|
+
result_kind: ResultKindApp
|
|
356
|
+
values: list[_ResultPoint]
|
|
357
|
+
data: bool = True
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class ModelEndpointMonitoringResultNoData(_ModelEndpointMonitoringResultValuesBase):
|
|
361
|
+
full_name: str
|
|
362
|
+
type: ModelEndpointMonitoringMetricType
|
|
363
|
+
data: bool = False
|
|
364
|
+
|
|
365
|
+
|
|
307
366
|
def _mapping_attributes(
|
|
308
367
|
base_model: BaseModel,
|
|
309
368
|
flattened_dictionary: dict,
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -113,7 +113,11 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
113
113
|
runs_completed_recent_count: int
|
|
114
114
|
runs_failed_recent_count: int
|
|
115
115
|
runs_running_count: int
|
|
116
|
-
|
|
116
|
+
distinct_schedules_count: int
|
|
117
|
+
distinct_scheduled_jobs_pending_count: int
|
|
118
|
+
distinct_scheduled_pipelines_pending_count: int
|
|
119
|
+
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
120
|
+
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
117
121
|
pipelines_running_count: typing.Optional[int] = None
|
|
118
122
|
|
|
119
123
|
|
mlrun/config.py
CHANGED
|
@@ -232,6 +232,10 @@ default_config = {
|
|
|
232
232
|
"databricks": {
|
|
233
233
|
"artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
|
|
234
234
|
},
|
|
235
|
+
"application": {
|
|
236
|
+
"default_sidecar_internal_port": 8050,
|
|
237
|
+
"default_authentication_mode": "accessKey",
|
|
238
|
+
},
|
|
235
239
|
},
|
|
236
240
|
# TODO: function defaults should be moved to the function spec config above
|
|
237
241
|
"function_defaults": {
|
|
@@ -503,6 +507,7 @@ default_config = {
|
|
|
503
507
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
504
508
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
505
509
|
"stream": "",
|
|
510
|
+
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
506
511
|
},
|
|
507
512
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
508
513
|
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
@@ -516,6 +521,7 @@ default_config = {
|
|
|
516
521
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
517
522
|
"store_type": "v3io-nosql",
|
|
518
523
|
"endpoint_store_connection": "",
|
|
524
|
+
"tsdb_connector_type": "v3io-tsdb",
|
|
519
525
|
},
|
|
520
526
|
"secret_stores": {
|
|
521
527
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -1088,6 +1094,7 @@ class Config:
|
|
|
1088
1094
|
target: str = "online",
|
|
1089
1095
|
artifact_path: str = None,
|
|
1090
1096
|
function_name: str = None,
|
|
1097
|
+
**kwargs,
|
|
1091
1098
|
) -> typing.Union[str, list[str]]:
|
|
1092
1099
|
"""Get the full path from the configuration based on the provided project and kind.
|
|
1093
1100
|
|
|
@@ -1114,7 +1121,7 @@ class Config:
|
|
|
1114
1121
|
)
|
|
1115
1122
|
if store_prefix_dict.get(kind):
|
|
1116
1123
|
# Target exist in store prefix and has a valid string value
|
|
1117
|
-
return store_prefix_dict[kind].format(project=project)
|
|
1124
|
+
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1118
1125
|
|
|
1119
1126
|
if (
|
|
1120
1127
|
function_name
|
|
@@ -1399,14 +1406,14 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1399
1406
|
if log_formatter_name := config.get("log_formatter"):
|
|
1400
1407
|
import mlrun.utils.logger
|
|
1401
1408
|
|
|
1402
|
-
log_formatter = mlrun.utils.
|
|
1409
|
+
log_formatter = mlrun.utils.resolve_formatter_by_kind(
|
|
1403
1410
|
mlrun.utils.FormatterKinds(log_formatter_name)
|
|
1404
1411
|
)
|
|
1405
1412
|
current_handler = mlrun.utils.logger.get_handler("default")
|
|
1406
1413
|
current_formatter_name = current_handler.formatter.__class__.__name__
|
|
1407
|
-
desired_formatter_name = log_formatter.
|
|
1414
|
+
desired_formatter_name = log_formatter.__name__
|
|
1408
1415
|
if current_formatter_name != desired_formatter_name:
|
|
1409
|
-
current_handler.setFormatter(log_formatter)
|
|
1416
|
+
current_handler.setFormatter(log_formatter())
|
|
1410
1417
|
|
|
1411
1418
|
# The default function pod resource values are of type str; however, when reading from environment variable numbers,
|
|
1412
1419
|
# it converts them to type int if contains only number, so we want to convert them to str.
|
|
@@ -188,6 +188,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
188
188
|
bucket: typing.Optional[str] = None
|
|
189
189
|
|
|
190
190
|
@pydantic.validator("bucket")
|
|
191
|
+
@classmethod
|
|
191
192
|
def check_bucket(cls, v):
|
|
192
193
|
if not v:
|
|
193
194
|
warnings.warn(
|
|
@@ -292,6 +293,7 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
292
293
|
bucket: typing.Optional[str] = None
|
|
293
294
|
|
|
294
295
|
@pydantic.validator("bucket")
|
|
296
|
+
@classmethod
|
|
295
297
|
def check_bucket(cls, v):
|
|
296
298
|
if not v:
|
|
297
299
|
warnings.warn(
|
|
@@ -344,13 +346,14 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
344
346
|
client_secret: typing.Optional[str] = None
|
|
345
347
|
sas_token: typing.Optional[str] = None
|
|
346
348
|
credential: typing.Optional[str] = None
|
|
347
|
-
|
|
349
|
+
container: typing.Optional[str] = None
|
|
348
350
|
|
|
349
|
-
@pydantic.validator("
|
|
350
|
-
|
|
351
|
+
@pydantic.validator("container")
|
|
352
|
+
@classmethod
|
|
353
|
+
def check_container(cls, v):
|
|
351
354
|
if not v:
|
|
352
355
|
warnings.warn(
|
|
353
|
-
"The '
|
|
356
|
+
"The 'container' attribute will be mandatory starting from version 1.9",
|
|
354
357
|
FutureWarning,
|
|
355
358
|
stacklevel=2,
|
|
356
359
|
)
|
|
@@ -358,10 +361,10 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
358
361
|
|
|
359
362
|
def url(self, subpath) -> str:
|
|
360
363
|
if subpath.startswith("/"):
|
|
361
|
-
# in azure the path after schema is starts with
|
|
364
|
+
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
362
365
|
subpath = subpath[1:]
|
|
363
|
-
if self.
|
|
364
|
-
return f"az://{self.
|
|
366
|
+
if self.container:
|
|
367
|
+
return f"az://{self.container}/{subpath}"
|
|
365
368
|
else:
|
|
366
369
|
return f"az://{subpath}"
|
|
367
370
|
|
mlrun/db/base.py
CHANGED
|
@@ -16,6 +16,7 @@ import datetime
|
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
|
+
import mlrun.alerts
|
|
19
20
|
import mlrun.common.schemas
|
|
20
21
|
import mlrun.model_monitoring
|
|
21
22
|
|
|
@@ -117,7 +118,18 @@ class RunDBInterface(ABC):
|
|
|
117
118
|
pass
|
|
118
119
|
|
|
119
120
|
@abstractmethod
|
|
120
|
-
def del_artifact(
|
|
121
|
+
def del_artifact(
|
|
122
|
+
self,
|
|
123
|
+
key,
|
|
124
|
+
tag="",
|
|
125
|
+
project="",
|
|
126
|
+
tree=None,
|
|
127
|
+
uid=None,
|
|
128
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
129
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
130
|
+
),
|
|
131
|
+
secrets: dict = None,
|
|
132
|
+
):
|
|
121
133
|
pass
|
|
122
134
|
|
|
123
135
|
@abstractmethod
|
|
@@ -543,7 +555,7 @@ class RunDBInterface(ABC):
|
|
|
543
555
|
end: Optional[str] = None,
|
|
544
556
|
metrics: Optional[list[str]] = None,
|
|
545
557
|
features: bool = False,
|
|
546
|
-
):
|
|
558
|
+
) -> mlrun.model_monitoring.ModelEndpoint:
|
|
547
559
|
pass
|
|
548
560
|
|
|
549
561
|
@abstractmethod
|
|
@@ -617,8 +629,8 @@ class RunDBInterface(ABC):
|
|
|
617
629
|
@abstractmethod
|
|
618
630
|
def store_api_gateway(
|
|
619
631
|
self,
|
|
620
|
-
project: str,
|
|
621
632
|
api_gateway: mlrun.common.schemas.APIGateway,
|
|
633
|
+
project: str = None,
|
|
622
634
|
):
|
|
623
635
|
pass
|
|
624
636
|
|
|
@@ -664,7 +676,7 @@ class RunDBInterface(ABC):
|
|
|
664
676
|
def store_alert_config(
|
|
665
677
|
self,
|
|
666
678
|
alert_name: str,
|
|
667
|
-
alert_data: Union[dict, mlrun.
|
|
679
|
+
alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
|
|
668
680
|
project="",
|
|
669
681
|
):
|
|
670
682
|
pass
|
|
@@ -685,6 +697,14 @@ class RunDBInterface(ABC):
|
|
|
685
697
|
def reset_alert_config(self, alert_name: str, project=""):
|
|
686
698
|
pass
|
|
687
699
|
|
|
700
|
+
@abstractmethod
|
|
701
|
+
def get_alert_template(self, template_name: str):
|
|
702
|
+
pass
|
|
703
|
+
|
|
704
|
+
@abstractmethod
|
|
705
|
+
def list_alert_templates(self):
|
|
706
|
+
pass
|
|
707
|
+
|
|
688
708
|
@abstractmethod
|
|
689
709
|
def get_builder_status(
|
|
690
710
|
self,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
import enum
|
|
16
16
|
import http
|
|
17
17
|
import re
|
|
18
|
-
import tempfile
|
|
19
18
|
import time
|
|
20
19
|
import traceback
|
|
21
20
|
import typing
|
|
@@ -26,9 +25,9 @@ from os import path, remove
|
|
|
26
25
|
from typing import Optional, Union
|
|
27
26
|
from urllib.parse import urlparse
|
|
28
27
|
|
|
29
|
-
import kfp
|
|
30
28
|
import requests
|
|
31
29
|
import semver
|
|
30
|
+
from mlrun_pipelines.utils import compile_pipeline
|
|
32
31
|
|
|
33
32
|
import mlrun
|
|
34
33
|
import mlrun.common.schemas
|
|
@@ -38,6 +37,7 @@ import mlrun.platforms
|
|
|
38
37
|
import mlrun.projects
|
|
39
38
|
import mlrun.runtimes.nuclio.api_gateway
|
|
40
39
|
import mlrun.utils
|
|
40
|
+
from mlrun.alerts.alert import AlertConfig
|
|
41
41
|
from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
|
|
42
42
|
from mlrun.errors import MLRunInvalidArgumentError, err_to_str
|
|
43
43
|
|
|
@@ -51,7 +51,6 @@ from ..utils import (
|
|
|
51
51
|
datetime_to_iso,
|
|
52
52
|
dict_to_json,
|
|
53
53
|
logger,
|
|
54
|
-
new_pipe_metadata,
|
|
55
54
|
normalize_name,
|
|
56
55
|
version,
|
|
57
56
|
)
|
|
@@ -590,7 +589,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
590
589
|
if offset < 0:
|
|
591
590
|
raise MLRunInvalidArgumentError("Offset cannot be negative")
|
|
592
591
|
if size is None:
|
|
593
|
-
size = int(
|
|
592
|
+
size = int(mlrun.mlconf.httpdb.logs.pull_logs_default_size_limit)
|
|
594
593
|
elif size == -1:
|
|
595
594
|
logger.warning(
|
|
596
595
|
"Retrieving all logs. This may be inefficient and can result in a large log."
|
|
@@ -636,23 +635,25 @@ class HTTPRunDB(RunDBInterface):
|
|
|
636
635
|
|
|
637
636
|
state, text = self.get_log(uid, project, offset=offset)
|
|
638
637
|
if text:
|
|
639
|
-
print(text.decode(errors=
|
|
638
|
+
print(text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors))
|
|
640
639
|
nil_resp = 0
|
|
641
640
|
while True:
|
|
642
641
|
offset += len(text)
|
|
643
642
|
# if we get 3 nil responses in a row, increase the sleep time to 10 seconds
|
|
644
643
|
# TODO: refactor this to use a conditional backoff mechanism
|
|
645
644
|
if nil_resp < 3:
|
|
646
|
-
time.sleep(int(
|
|
645
|
+
time.sleep(int(mlrun.mlconf.httpdb.logs.pull_logs_default_interval))
|
|
647
646
|
else:
|
|
648
647
|
time.sleep(
|
|
649
|
-
int(
|
|
648
|
+
int(
|
|
649
|
+
mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
|
|
650
|
+
)
|
|
650
651
|
)
|
|
651
652
|
state, text = self.get_log(uid, project, offset=offset)
|
|
652
653
|
if text:
|
|
653
654
|
nil_resp = 0
|
|
654
655
|
print(
|
|
655
|
-
text.decode(errors=
|
|
656
|
+
text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors),
|
|
656
657
|
end="",
|
|
657
658
|
)
|
|
658
659
|
else:
|
|
@@ -985,7 +986,18 @@ class HTTPRunDB(RunDBInterface):
|
|
|
985
986
|
resp = self.api_call("GET", endpoint_path, error, params=params, version="v2")
|
|
986
987
|
return resp.json()
|
|
987
988
|
|
|
988
|
-
def del_artifact(
|
|
989
|
+
def del_artifact(
|
|
990
|
+
self,
|
|
991
|
+
key,
|
|
992
|
+
tag=None,
|
|
993
|
+
project="",
|
|
994
|
+
tree=None,
|
|
995
|
+
uid=None,
|
|
996
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
997
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
998
|
+
),
|
|
999
|
+
secrets: dict = None,
|
|
1000
|
+
):
|
|
989
1001
|
"""Delete an artifact.
|
|
990
1002
|
|
|
991
1003
|
:param key: Identifying key of the artifact.
|
|
@@ -993,6 +1005,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
993
1005
|
:param project: Project that the artifact belongs to.
|
|
994
1006
|
:param tree: The tree which generated this artifact.
|
|
995
1007
|
:param uid: A unique ID for this specific version of the artifact (the uid that was generated in the backend)
|
|
1008
|
+
:param deletion_strategy: The artifact deletion strategy types.
|
|
1009
|
+
:param secrets: Credentials needed to access the artifact data.
|
|
996
1010
|
"""
|
|
997
1011
|
|
|
998
1012
|
endpoint_path = f"projects/{project}/artifacts/{key}"
|
|
@@ -1001,9 +1015,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1001
1015
|
"tag": tag,
|
|
1002
1016
|
"tree": tree,
|
|
1003
1017
|
"uid": uid,
|
|
1018
|
+
"deletion_strategy": deletion_strategy,
|
|
1004
1019
|
}
|
|
1005
1020
|
error = f"del artifact {project}/{key}"
|
|
1006
|
-
self.api_call(
|
|
1021
|
+
self.api_call(
|
|
1022
|
+
"DELETE",
|
|
1023
|
+
endpoint_path,
|
|
1024
|
+
error,
|
|
1025
|
+
params=params,
|
|
1026
|
+
version="v2",
|
|
1027
|
+
body=dict_to_json(secrets),
|
|
1028
|
+
)
|
|
1007
1029
|
|
|
1008
1030
|
def list_artifacts(
|
|
1009
1031
|
self,
|
|
@@ -1018,6 +1040,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1018
1040
|
kind: str = None,
|
|
1019
1041
|
category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
|
|
1020
1042
|
tree: str = None,
|
|
1043
|
+
producer_uri: str = None,
|
|
1021
1044
|
) -> ArtifactList:
|
|
1022
1045
|
"""List artifacts filtered by various parameters.
|
|
1023
1046
|
|
|
@@ -1046,9 +1069,12 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1046
1069
|
:param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
|
|
1047
1070
|
artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
|
|
1048
1071
|
from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
|
|
1049
|
-
:param kind:
|
|
1050
|
-
:param category:
|
|
1051
|
-
:param tree:
|
|
1072
|
+
:param kind: Return artifacts of the requested kind.
|
|
1073
|
+
:param category: Return artifacts of the requested category.
|
|
1074
|
+
:param tree: Return artifacts of the requested tree.
|
|
1075
|
+
:param producer_uri: Return artifacts produced by the requested producer URI. Producer URI usually
|
|
1076
|
+
points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
|
|
1077
|
+
is a workflow id (artifact was created as part of a workflow).
|
|
1052
1078
|
"""
|
|
1053
1079
|
|
|
1054
1080
|
project = project or config.default_project
|
|
@@ -1067,6 +1093,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1067
1093
|
"category": category,
|
|
1068
1094
|
"tree": tree,
|
|
1069
1095
|
"format": mlrun.common.schemas.ArtifactsFormat.full.value,
|
|
1096
|
+
"producer_uri": producer_uri,
|
|
1070
1097
|
}
|
|
1071
1098
|
error = "list artifacts"
|
|
1072
1099
|
endpoint_path = f"projects/{project}/artifacts"
|
|
@@ -1828,14 +1855,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1828
1855
|
if isinstance(pipeline, str):
|
|
1829
1856
|
pipe_file = pipeline
|
|
1830
1857
|
else:
|
|
1831
|
-
pipe_file =
|
|
1832
|
-
conf = new_pipe_metadata(
|
|
1858
|
+
pipe_file = compile_pipeline(
|
|
1833
1859
|
artifact_path=artifact_path,
|
|
1834
1860
|
cleanup_ttl=cleanup_ttl,
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
kfp.compiler.Compiler().compile(
|
|
1838
|
-
pipeline, pipe_file, type_check=False, pipeline_conf=conf
|
|
1861
|
+
ops=ops,
|
|
1862
|
+
pipeline=pipeline,
|
|
1839
1863
|
)
|
|
1840
1864
|
|
|
1841
1865
|
if pipe_file.endswith(".yaml"):
|
|
@@ -3112,14 +3136,12 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3112
3136
|
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a
|
|
3113
3137
|
label (i.e. list("key=value")) or by looking for the existence of a given key (i.e. "key")
|
|
3114
3138
|
:param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
|
|
3115
|
-
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
|
|
3122
|
-
days), or 0 for the earliest time.
|
|
3139
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339 time, a
|
|
3140
|
+
Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
|
|
3141
|
+
`m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
|
|
3142
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339 time, a
|
|
3143
|
+
Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
|
|
3144
|
+
`m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
|
|
3123
3145
|
:param top_level: if true will return only routers and endpoint that are NOT children of any router
|
|
3124
3146
|
:param uids: if passed will return a list `ModelEndpoint` object with uid in uids
|
|
3125
3147
|
"""
|
|
@@ -3168,13 +3190,13 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3168
3190
|
:param project: The name of the project
|
|
3169
3191
|
:param endpoint_id: The unique id of the model endpoint.
|
|
3170
3192
|
:param start: The start time of the metrics. Can be represented by a string containing an
|
|
3171
|
-
RFC 3339 time, a
|
|
3172
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
|
|
3173
|
-
0 for the earliest time.
|
|
3193
|
+
RFC 3339 time, a Unix timestamp in milliseconds, a relative time
|
|
3194
|
+
(`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
|
|
3195
|
+
`'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
|
|
3174
3196
|
:param end: The end time of the metrics. Can be represented by a string containing an
|
|
3175
|
-
RFC 3339 time, a
|
|
3176
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
|
|
3177
|
-
0 for the earliest time.
|
|
3197
|
+
RFC 3339 time, a Unix timestamp in milliseconds, a relative time
|
|
3198
|
+
(`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
|
|
3199
|
+
`'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
|
|
3178
3200
|
:param metrics: A list of metrics to return for the model endpoint. There are pre-defined
|
|
3179
3201
|
metrics for model endpoints such as predictions_per_second and
|
|
3180
3202
|
latency_avg_5m but also custom metrics defined by the user. Please note that
|
|
@@ -3915,7 +3937,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3915
3937
|
logger.warning(
|
|
3916
3938
|
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
3917
3939
|
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
3918
|
-
"(see mlrun.
|
|
3940
|
+
"(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
|
|
3919
3941
|
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
3920
3942
|
source=func.spec.build.source,
|
|
3921
3943
|
load_source_on_run=func.spec.build.load_source_on_run,
|
|
@@ -3943,9 +3965,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3943
3965
|
def store_alert_config(
|
|
3944
3966
|
self,
|
|
3945
3967
|
alert_name: str,
|
|
3946
|
-
alert_data: Union[dict,
|
|
3968
|
+
alert_data: Union[dict, AlertConfig],
|
|
3947
3969
|
project="",
|
|
3948
|
-
):
|
|
3970
|
+
) -> AlertConfig:
|
|
3949
3971
|
"""
|
|
3950
3972
|
Create/modify an alert.
|
|
3951
3973
|
:param alert_name: The name of the alert.
|
|
@@ -3956,13 +3978,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3956
3978
|
project = project or config.default_project
|
|
3957
3979
|
endpoint_path = f"projects/{project}/alerts/{alert_name}"
|
|
3958
3980
|
error_message = f"put alert {project}/alerts/{alert_name}"
|
|
3959
|
-
|
|
3960
|
-
alert_data
|
|
3981
|
+
alert_instance = (
|
|
3982
|
+
alert_data
|
|
3983
|
+
if isinstance(alert_data, AlertConfig)
|
|
3984
|
+
else AlertConfig.from_dict(alert_data)
|
|
3985
|
+
)
|
|
3986
|
+
alert_instance.validate_required_fields()
|
|
3987
|
+
|
|
3988
|
+
alert_data = alert_instance.to_dict()
|
|
3961
3989
|
body = _as_json(alert_data)
|
|
3962
3990
|
response = self.api_call("PUT", endpoint_path, error_message, body=body)
|
|
3963
|
-
return
|
|
3991
|
+
return AlertConfig.from_dict(response.json())
|
|
3964
3992
|
|
|
3965
|
-
def get_alert_config(self, alert_name: str, project=""):
|
|
3993
|
+
def get_alert_config(self, alert_name: str, project="") -> AlertConfig:
|
|
3966
3994
|
"""
|
|
3967
3995
|
Retrieve an alert.
|
|
3968
3996
|
:param alert_name: The name of the alert to retrieve.
|
|
@@ -3973,9 +4001,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3973
4001
|
endpoint_path = f"projects/{project}/alerts/{alert_name}"
|
|
3974
4002
|
error_message = f"get alert {project}/alerts/{alert_name}"
|
|
3975
4003
|
response = self.api_call("GET", endpoint_path, error_message)
|
|
3976
|
-
return
|
|
4004
|
+
return AlertConfig.from_dict(response.json())
|
|
3977
4005
|
|
|
3978
|
-
def list_alerts_configs(self, project=""):
|
|
4006
|
+
def list_alerts_configs(self, project="") -> list[AlertConfig]:
|
|
3979
4007
|
"""
|
|
3980
4008
|
Retrieve list of alerts of a project.
|
|
3981
4009
|
:param project: The project name.
|
|
@@ -3987,7 +4015,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3987
4015
|
response = self.api_call("GET", endpoint_path, error_message).json()
|
|
3988
4016
|
results = []
|
|
3989
4017
|
for item in response:
|
|
3990
|
-
results.append(
|
|
4018
|
+
results.append(AlertConfig(**item))
|
|
3991
4019
|
return results
|
|
3992
4020
|
|
|
3993
4021
|
def delete_alert_config(self, alert_name: str, project=""):
|
|
@@ -4012,6 +4040,32 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4012
4040
|
error_message = f"post alert {project}/alerts/{alert_name}/reset"
|
|
4013
4041
|
self.api_call("POST", endpoint_path, error_message)
|
|
4014
4042
|
|
|
4043
|
+
def get_alert_template(
|
|
4044
|
+
self, template_name: str
|
|
4045
|
+
) -> mlrun.common.schemas.AlertTemplate:
|
|
4046
|
+
"""
|
|
4047
|
+
Retrieve a specific alert template.
|
|
4048
|
+
:param template_name: The name of the template to retrieve.
|
|
4049
|
+
:return: The template object.
|
|
4050
|
+
"""
|
|
4051
|
+
endpoint_path = f"alert-templates/{template_name}"
|
|
4052
|
+
error_message = f"get template alert-templates/{template_name}"
|
|
4053
|
+
response = self.api_call("GET", endpoint_path, error_message)
|
|
4054
|
+
return mlrun.common.schemas.AlertTemplate(**response.json())
|
|
4055
|
+
|
|
4056
|
+
def list_alert_templates(self) -> list[mlrun.common.schemas.AlertTemplate]:
|
|
4057
|
+
"""
|
|
4058
|
+
Retrieve list of all alert templates.
|
|
4059
|
+
:return: All the alert template objects in the database.
|
|
4060
|
+
"""
|
|
4061
|
+
endpoint_path = "alert-templates"
|
|
4062
|
+
error_message = "get templates /alert-templates"
|
|
4063
|
+
response = self.api_call("GET", endpoint_path, error_message).json()
|
|
4064
|
+
results = []
|
|
4065
|
+
for item in response:
|
|
4066
|
+
results.append(mlrun.common.schemas.AlertTemplate(**item))
|
|
4067
|
+
return results
|
|
4068
|
+
|
|
4015
4069
|
|
|
4016
4070
|
def _as_json(obj):
|
|
4017
4071
|
fn = getattr(obj, "to_json", None)
|