mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +5 -2
- mlrun/alerts/alert.py +1 -1
- mlrun/artifacts/manager.py +5 -1
- mlrun/common/constants.py +64 -3
- mlrun/common/formatters/__init__.py +16 -0
- mlrun/common/formatters/base.py +59 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/runtimes/constants.py +32 -4
- mlrun/common/schemas/__init__.py +1 -2
- mlrun/common/schemas/alert.py +31 -9
- mlrun/common/schemas/api_gateway.py +52 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +9 -4
- mlrun/common/schemas/model_monitoring/constants.py +22 -8
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/config.py +9 -2
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +106 -7
- mlrun/datastore/store_resources.py +5 -1
- mlrun/datastore/targets.py +5 -4
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +5 -1
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +5 -1
- mlrun/errors.py +6 -0
- mlrun/execution.py +16 -6
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +7 -3
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/launcher/client.py +4 -2
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/model.py +5 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +16 -4
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +232 -38
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
- mlrun/model_monitoring/helpers.py +45 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +50 -20
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/operations.py +8 -5
- mlrun/projects/pipelines.py +42 -15
- mlrun/projects/project.py +55 -14
- mlrun/render.py +8 -5
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/local.py +4 -1
- mlrun/runtimes/nuclio/api_gateway.py +32 -8
- mlrun/runtimes/nuclio/application/application.py +3 -3
- mlrun/runtimes/nuclio/function.py +1 -4
- mlrun/runtimes/utils.py +5 -6
- mlrun/serving/server.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +28 -7
- mlrun/utils/logger.py +28 -1
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +47 -42
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
|
@@ -172,7 +172,7 @@ class ApplicationStatus(NuclioStatus):
|
|
|
172
172
|
class ApplicationRuntime(RemoteRuntime):
|
|
173
173
|
kind = "application"
|
|
174
174
|
|
|
175
|
-
@min_nuclio_versions("1.
|
|
175
|
+
@min_nuclio_versions("1.13.1")
|
|
176
176
|
def __init__(self, spec=None, metadata=None):
|
|
177
177
|
super().__init__(spec=spec, metadata=metadata)
|
|
178
178
|
|
|
@@ -387,7 +387,7 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
387
387
|
elif authentication_mode == schemas.APIGatewayAuthenticationMode.basic:
|
|
388
388
|
api_gateway.with_basic_auth(*authentication_creds)
|
|
389
389
|
|
|
390
|
-
db =
|
|
390
|
+
db = self._get_db()
|
|
391
391
|
api_gateway_scheme = db.store_api_gateway(
|
|
392
392
|
api_gateway=api_gateway.to_scheme(), project=self.metadata.project
|
|
393
393
|
)
|
|
@@ -505,7 +505,7 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
505
505
|
if not self.status.api_gateway_name:
|
|
506
506
|
return
|
|
507
507
|
|
|
508
|
-
db =
|
|
508
|
+
db = self._get_db()
|
|
509
509
|
api_gateway_scheme = db.get_api_gateway(
|
|
510
510
|
name=self.status.api_gateway_name, project=self.metadata.project
|
|
511
511
|
)
|
|
@@ -65,10 +65,7 @@ def min_nuclio_versions(*versions):
|
|
|
65
65
|
if validate_nuclio_version_compatibility(*versions):
|
|
66
66
|
return function(*args, **kwargs)
|
|
67
67
|
|
|
68
|
-
message = (
|
|
69
|
-
f"{function.__name__} is supported since nuclio {' or '.join(versions)}, currently using "
|
|
70
|
-
f"nuclio {mlconf.nuclio_version}, please upgrade."
|
|
71
|
-
)
|
|
68
|
+
message = f"'{function.__qualname__}' function requires Nuclio v{' or v'.join(versions)} or higher"
|
|
72
69
|
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
73
70
|
|
|
74
71
|
return wrapper
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -23,6 +23,7 @@ import pandas as pd
|
|
|
23
23
|
|
|
24
24
|
import mlrun
|
|
25
25
|
import mlrun.common.constants
|
|
26
|
+
import mlrun.common.constants as mlrun_constants
|
|
26
27
|
import mlrun.common.schemas
|
|
27
28
|
import mlrun.utils.regex
|
|
28
29
|
from mlrun.artifacts import TableArtifact
|
|
@@ -38,9 +39,6 @@ class RunError(Exception):
|
|
|
38
39
|
pass
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
mlrun_key = "mlrun/"
|
|
42
|
-
|
|
43
|
-
|
|
44
42
|
class _ContextStore:
|
|
45
43
|
def __init__(self):
|
|
46
44
|
self._context = None
|
|
@@ -372,10 +370,10 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
|
|
|
372
370
|
|
|
373
371
|
|
|
374
372
|
def get_func_selector(project, name=None, tag=None):
|
|
375
|
-
s = [f"{
|
|
373
|
+
s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
|
|
376
374
|
if name:
|
|
377
|
-
s.append(f"{
|
|
378
|
-
s.append(f"{
|
|
375
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
|
|
376
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
|
|
379
377
|
return s
|
|
380
378
|
|
|
381
379
|
|
|
@@ -438,6 +436,7 @@ def enrich_run_labels(
|
|
|
438
436
|
):
|
|
439
437
|
labels_enrichment = {
|
|
440
438
|
RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
|
|
439
|
+
# TODO: remove this in 1.9.0
|
|
441
440
|
RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
442
441
|
}
|
|
443
442
|
labels_to_enrich = labels_to_enrich or RunLabels.all()
|
mlrun/serving/server.py
CHANGED
|
@@ -387,7 +387,7 @@ def v2_serving_handler(context, event, get_body=False):
|
|
|
387
387
|
|
|
388
388
|
|
|
389
389
|
def create_graph_server(
|
|
390
|
-
parameters=
|
|
390
|
+
parameters=None,
|
|
391
391
|
load_mode=None,
|
|
392
392
|
graph=None,
|
|
393
393
|
verbose=False,
|
|
@@ -403,6 +403,7 @@ def create_graph_server(
|
|
|
403
403
|
server.graph.add_route("my", class_name=MyModelClass, model_path="{path}", z=100)
|
|
404
404
|
print(server.test("/v2/models/my/infer", testdata))
|
|
405
405
|
"""
|
|
406
|
+
parameters = parameters or {}
|
|
406
407
|
server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
|
|
407
408
|
server.set_current_function(
|
|
408
409
|
current_function or os.environ.get("SERVING_CURRENT_FUNCTION", "")
|
mlrun/utils/async_http.py
CHANGED
|
@@ -24,7 +24,7 @@ from aiohttp_retry import ExponentialRetry, RequestParams, RetryClient, RetryOpt
|
|
|
24
24
|
from aiohttp_retry.client import _RequestContext
|
|
25
25
|
|
|
26
26
|
from mlrun.config import config
|
|
27
|
-
from mlrun.errors import err_to_str
|
|
27
|
+
from mlrun.errors import err_to_str, raise_for_status
|
|
28
28
|
|
|
29
29
|
from .helpers import logger as mlrun_logger
|
|
30
30
|
|
|
@@ -46,12 +46,21 @@ class AsyncClientWithRetry(RetryClient):
|
|
|
46
46
|
*args,
|
|
47
47
|
**kwargs,
|
|
48
48
|
):
|
|
49
|
+
# do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
|
|
50
|
+
blacklisted_methods = (
|
|
51
|
+
blacklisted_methods
|
|
52
|
+
if blacklisted_methods is not None
|
|
53
|
+
else [
|
|
54
|
+
"POST",
|
|
55
|
+
"PUT",
|
|
56
|
+
"PATCH",
|
|
57
|
+
]
|
|
58
|
+
)
|
|
49
59
|
super().__init__(
|
|
50
60
|
*args,
|
|
51
61
|
retry_options=ExponentialRetryOverride(
|
|
52
62
|
retry_on_exception=retry_on_exception,
|
|
53
|
-
|
|
54
|
-
blacklisted_methods=blacklisted_methods or ["POST", "PUT", "PATCH"],
|
|
63
|
+
blacklisted_methods=blacklisted_methods,
|
|
55
64
|
attempts=max_retries,
|
|
56
65
|
statuses=retry_on_status_codes,
|
|
57
66
|
factor=retry_backoff_factor,
|
|
@@ -63,6 +72,12 @@ class AsyncClientWithRetry(RetryClient):
|
|
|
63
72
|
**kwargs,
|
|
64
73
|
)
|
|
65
74
|
|
|
75
|
+
def methods_blacklist_update_required(self, new_blacklist: str):
|
|
76
|
+
self._retry_options: ExponentialRetryOverride
|
|
77
|
+
return set(self._retry_options.blacklisted_methods).difference(
|
|
78
|
+
set(new_blacklist)
|
|
79
|
+
)
|
|
80
|
+
|
|
66
81
|
def _make_requests(
|
|
67
82
|
self,
|
|
68
83
|
params_list: list[RequestParams],
|
|
@@ -173,7 +188,7 @@ class _CustomRequestContext(_RequestContext):
|
|
|
173
188
|
last_attempt = current_attempt == self._retry_options.attempts
|
|
174
189
|
if self._is_status_code_ok(response.status) or last_attempt:
|
|
175
190
|
if self._raise_for_status:
|
|
176
|
-
|
|
191
|
+
raise_for_status(response)
|
|
177
192
|
|
|
178
193
|
self._response = response
|
|
179
194
|
return response
|
|
@@ -275,6 +290,11 @@ class _CustomRequestContext(_RequestContext):
|
|
|
275
290
|
if isinstance(exc.os_error, exc_type):
|
|
276
291
|
return
|
|
277
292
|
if exc.__cause__:
|
|
278
|
-
return
|
|
293
|
+
# If the cause exception is retriable, return, otherwise, raise the original exception
|
|
294
|
+
try:
|
|
295
|
+
self.verify_exception_type(exc.__cause__)
|
|
296
|
+
except Exception:
|
|
297
|
+
raise exc
|
|
298
|
+
return
|
|
279
299
|
else:
|
|
280
300
|
raise exc
|
mlrun/utils/helpers.py
CHANGED
|
@@ -973,6 +973,15 @@ def get_ui_url(project, uid=None):
|
|
|
973
973
|
return url
|
|
974
974
|
|
|
975
975
|
|
|
976
|
+
def get_model_endpoint_url(project, model_name, model_endpoint_id):
|
|
977
|
+
url = ""
|
|
978
|
+
if mlrun.mlconf.resolve_ui_url():
|
|
979
|
+
url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
|
|
980
|
+
if model_name:
|
|
981
|
+
url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
|
|
982
|
+
return url
|
|
983
|
+
|
|
984
|
+
|
|
976
985
|
def get_workflow_url(project, id=None):
|
|
977
986
|
url = ""
|
|
978
987
|
if mlrun.mlconf.resolve_ui_url():
|
|
@@ -1097,7 +1106,7 @@ def get_function(function, namespace):
|
|
|
1097
1106
|
|
|
1098
1107
|
|
|
1099
1108
|
def get_handler_extended(
|
|
1100
|
-
handler_path: str, context=None, class_args: dict =
|
|
1109
|
+
handler_path: str, context=None, class_args: dict = None, namespaces=None
|
|
1101
1110
|
):
|
|
1102
1111
|
"""get function handler from [class_name::]handler string
|
|
1103
1112
|
|
|
@@ -1107,6 +1116,7 @@ def get_handler_extended(
|
|
|
1107
1116
|
:param namespaces: one or list of namespaces/modules to search the handler in
|
|
1108
1117
|
:return: function handler (callable)
|
|
1109
1118
|
"""
|
|
1119
|
+
class_args = class_args or {}
|
|
1110
1120
|
if "::" not in handler_path:
|
|
1111
1121
|
return get_function(handler_path, namespaces)
|
|
1112
1122
|
|
|
@@ -1183,7 +1193,7 @@ def calculate_dataframe_hash(dataframe: pandas.DataFrame):
|
|
|
1183
1193
|
return hashlib.sha1(pandas.util.hash_pandas_object(dataframe).values).hexdigest()
|
|
1184
1194
|
|
|
1185
1195
|
|
|
1186
|
-
def template_artifact_path(artifact_path, project, run_uid=
|
|
1196
|
+
def template_artifact_path(artifact_path, project, run_uid=None):
|
|
1187
1197
|
"""
|
|
1188
1198
|
Replace {{run.uid}} with the run uid and {{project}} with the project name in the artifact path.
|
|
1189
1199
|
If no run uid is provided, the word `project` will be used instead as it is assumed to be a project
|
|
@@ -1191,6 +1201,7 @@ def template_artifact_path(artifact_path, project, run_uid="project"):
|
|
|
1191
1201
|
"""
|
|
1192
1202
|
if not artifact_path:
|
|
1193
1203
|
return artifact_path
|
|
1204
|
+
run_uid = run_uid or "project"
|
|
1194
1205
|
artifact_path = artifact_path.replace("{{run.uid}}", run_uid)
|
|
1195
1206
|
artifact_path = _fill_project_path_template(artifact_path, project)
|
|
1196
1207
|
return artifact_path
|
|
@@ -1575,11 +1586,12 @@ def validate_component_version_compatibility(
|
|
|
1575
1586
|
component_current_version = mlrun.mlconf.igz_version
|
|
1576
1587
|
parsed_current_version = mlrun.mlconf.get_parsed_igz_version()
|
|
1577
1588
|
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1589
|
+
if parsed_current_version:
|
|
1590
|
+
# ignore pre-release and build metadata, as iguazio version always has them, and we only care about the
|
|
1591
|
+
# major, minor, and patch versions
|
|
1592
|
+
parsed_current_version = semver.VersionInfo.parse(
|
|
1593
|
+
f"{parsed_current_version.major}.{parsed_current_version.minor}.{parsed_current_version.patch}"
|
|
1594
|
+
)
|
|
1583
1595
|
if component_name == "nuclio":
|
|
1584
1596
|
component_current_version = mlrun.mlconf.nuclio_version
|
|
1585
1597
|
parsed_current_version = semver.VersionInfo.parse(
|
|
@@ -1603,3 +1615,12 @@ def validate_component_version_compatibility(
|
|
|
1603
1615
|
if parsed_current_version < parsed_min_version:
|
|
1604
1616
|
return False
|
|
1605
1617
|
return True
|
|
1618
|
+
|
|
1619
|
+
|
|
1620
|
+
def format_alert_summary(
|
|
1621
|
+
alert: mlrun.common.schemas.AlertConfig, event_data: mlrun.common.schemas.Event
|
|
1622
|
+
) -> str:
|
|
1623
|
+
result = alert.summary.replace("{{project}}", alert.project)
|
|
1624
|
+
result = result.replace("{{name}}", alert.name)
|
|
1625
|
+
result = result.replace("{{entity}}", event_data.entity.ids[0])
|
|
1626
|
+
return result
|
mlrun/utils/logger.py
CHANGED
|
@@ -93,7 +93,25 @@ class HumanReadableFormatter(_BaseFormatter):
|
|
|
93
93
|
|
|
94
94
|
class HumanReadableExtendedFormatter(HumanReadableFormatter):
|
|
95
95
|
def format(self, record) -> str:
|
|
96
|
-
more =
|
|
96
|
+
more = ""
|
|
97
|
+
record_with = self._record_with(record)
|
|
98
|
+
if record_with:
|
|
99
|
+
|
|
100
|
+
def _format_value(val):
|
|
101
|
+
formatted_val = (
|
|
102
|
+
val
|
|
103
|
+
if isinstance(val, str)
|
|
104
|
+
else str(orjson.loads(self._json_dump(val)))
|
|
105
|
+
)
|
|
106
|
+
return (
|
|
107
|
+
formatted_val.replace("\n", "\n\t\t")
|
|
108
|
+
if len(formatted_val) < 4096
|
|
109
|
+
else repr(formatted_val)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
more = "\n\t" + "\n\t".join(
|
|
113
|
+
[f"{key}: {_format_value(val)}" for key, val in record_with.items()]
|
|
114
|
+
)
|
|
97
115
|
return (
|
|
98
116
|
"> "
|
|
99
117
|
f"{self.formatTime(record, self.datefmt)} "
|
|
@@ -234,6 +252,15 @@ def resolve_formatter_by_kind(
|
|
|
234
252
|
}[formatter_kind]
|
|
235
253
|
|
|
236
254
|
|
|
255
|
+
def create_test_logger(name: str = "mlrun", stream: IO[str] = stdout) -> Logger:
|
|
256
|
+
return create_logger(
|
|
257
|
+
level="debug",
|
|
258
|
+
formatter_kind=FormatterKinds.HUMAN_EXTENDED.name,
|
|
259
|
+
name=name,
|
|
260
|
+
stream=stream,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
237
264
|
def create_logger(
|
|
238
265
|
level: Optional[str] = None,
|
|
239
266
|
formatter_kind: str = FormatterKinds.HUMAN.name,
|
|
@@ -51,14 +51,19 @@ class NotificationTypes(str, enum.Enum):
|
|
|
51
51
|
self.console: [self.ipython],
|
|
52
52
|
}.get(self, [])
|
|
53
53
|
|
|
54
|
+
@classmethod
|
|
55
|
+
def local(cls) -> list[str]:
|
|
56
|
+
return [
|
|
57
|
+
cls.console,
|
|
58
|
+
cls.ipython,
|
|
59
|
+
]
|
|
60
|
+
|
|
54
61
|
@classmethod
|
|
55
62
|
def all(cls) -> list[str]:
|
|
56
|
-
return
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
]
|
|
64
|
-
)
|
|
63
|
+
return [
|
|
64
|
+
cls.console,
|
|
65
|
+
cls.git,
|
|
66
|
+
cls.ipython,
|
|
67
|
+
cls.slack,
|
|
68
|
+
cls.webhook,
|
|
69
|
+
]
|
|
@@ -32,6 +32,7 @@ class SlackNotification(NotificationBase):
|
|
|
32
32
|
"completed": ":smiley:",
|
|
33
33
|
"running": ":man-running:",
|
|
34
34
|
"error": ":x:",
|
|
35
|
+
"skipped": ":zzz:",
|
|
35
36
|
}
|
|
36
37
|
|
|
37
38
|
async def push(
|
|
@@ -135,8 +136,16 @@ class SlackNotification(NotificationBase):
|
|
|
135
136
|
line = [
|
|
136
137
|
self._get_slack_row(f":bell: {alert.name} alert has occurred"),
|
|
137
138
|
self._get_slack_row(f"*Project:*\n{alert.project}"),
|
|
138
|
-
self._get_slack_row(f"*
|
|
139
|
+
self._get_slack_row(f"*ID:*\n{event_data.entity.ids[0]}"),
|
|
139
140
|
]
|
|
141
|
+
|
|
142
|
+
if alert.summary:
|
|
143
|
+
line.append(
|
|
144
|
+
self._get_slack_row(
|
|
145
|
+
f"*Summary:*\n{mlrun.utils.helpers.format_alert_summary(alert, event_data)}"
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
140
149
|
if event_data.value_dict:
|
|
141
150
|
data_lines = []
|
|
142
151
|
for key, value in event_data.value_dict.items():
|
|
@@ -144,10 +153,21 @@ class SlackNotification(NotificationBase):
|
|
|
144
153
|
data_text = "\n".join(data_lines)
|
|
145
154
|
line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
|
|
146
155
|
|
|
147
|
-
if
|
|
148
|
-
|
|
149
|
-
):
|
|
150
|
-
|
|
156
|
+
if (
|
|
157
|
+
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
158
|
+
): # JOB entity
|
|
159
|
+
uid = event_data.value_dict.get("uid")
|
|
160
|
+
url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
|
|
161
|
+
overview_type = "Job overview"
|
|
162
|
+
else: # MODEL entity
|
|
163
|
+
model_name = event_data.value_dict.get("model")
|
|
164
|
+
model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
|
|
165
|
+
url = mlrun.utils.helpers.get_model_endpoint_url(
|
|
166
|
+
alert.project, model_name, model_endpoint_id
|
|
167
|
+
)
|
|
168
|
+
overview_type = "Model endpoint"
|
|
169
|
+
|
|
170
|
+
line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
|
|
151
171
|
|
|
152
172
|
return line
|
|
153
173
|
|
|
@@ -157,11 +177,11 @@ class SlackNotification(NotificationBase):
|
|
|
157
177
|
|
|
158
178
|
# Only show the URL if the run is not a function (serving or mlrun function)
|
|
159
179
|
kind = run.get("step_kind")
|
|
160
|
-
|
|
180
|
+
state = run["status"].get("state", "")
|
|
181
|
+
if state != "skipped" and (url and not kind or kind == "run"):
|
|
161
182
|
line = f'<{url}|*{meta.get("name")}*>'
|
|
162
183
|
else:
|
|
163
184
|
line = meta.get("name")
|
|
164
|
-
state = run["status"].get("state", "")
|
|
165
185
|
if kind:
|
|
166
186
|
line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
|
|
167
187
|
line = f'{self.emojis.get(state, ":question:")} {line}'
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import datetime
|
|
17
|
-
import json
|
|
18
17
|
import os
|
|
19
18
|
import re
|
|
20
19
|
import traceback
|
|
@@ -23,7 +22,9 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
23
22
|
|
|
24
23
|
import kfp
|
|
25
24
|
import mlrun_pipelines.common.ops
|
|
25
|
+
import mlrun_pipelines.models
|
|
26
26
|
|
|
27
|
+
import mlrun.common.constants as mlrun_constants
|
|
27
28
|
import mlrun.common.runtimes.constants
|
|
28
29
|
import mlrun.common.schemas
|
|
29
30
|
import mlrun.config
|
|
@@ -239,8 +240,8 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
239
240
|
resource = "Run"
|
|
240
241
|
runs = [run.to_dict()]
|
|
241
242
|
|
|
242
|
-
if
|
|
243
|
-
resource =
|
|
243
|
+
if mlrun_constants.MLRunInternalLabels.workflow in run.metadata.labels:
|
|
244
|
+
resource = mlrun_constants.MLRunInternalLabels.workflow
|
|
244
245
|
custom_message = (
|
|
245
246
|
f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
|
|
246
247
|
)
|
|
@@ -392,17 +393,29 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
392
393
|
steps = []
|
|
393
394
|
db = mlrun.get_run_db()
|
|
394
395
|
|
|
395
|
-
def _add_run_step(
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
396
|
+
def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
397
|
+
try:
|
|
398
|
+
_run = db.list_runs(
|
|
399
|
+
project=run.metadata.project,
|
|
400
|
+
labels=f"mlrun_constants.MLRunInternalLabels.runner_pod={_step.node_name}",
|
|
401
|
+
)[0]
|
|
402
|
+
except IndexError:
|
|
403
|
+
_run = {
|
|
404
|
+
"metadata": {
|
|
405
|
+
"name": _step.display_name,
|
|
406
|
+
"project": run.metadata.project,
|
|
407
|
+
},
|
|
408
|
+
}
|
|
409
|
+
_run["step_kind"] = _step.step_type
|
|
410
|
+
if _step.skipped:
|
|
411
|
+
_run.setdefault("status", {})["state"] = (
|
|
412
|
+
mlrun.common.runtimes.constants.RunStates.skipped
|
|
413
|
+
)
|
|
401
414
|
steps.append(_run)
|
|
402
415
|
|
|
403
|
-
def _add_deploy_function_step(
|
|
416
|
+
def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
404
417
|
project, name, hash_key = self._extract_function_uri(
|
|
405
|
-
|
|
418
|
+
_step.get_annotation("mlrun/function-uri")
|
|
406
419
|
)
|
|
407
420
|
if name:
|
|
408
421
|
try:
|
|
@@ -419,16 +432,19 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
419
432
|
"hash_key": hash_key,
|
|
420
433
|
},
|
|
421
434
|
}
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
435
|
+
pod_phase = _step.phase
|
|
436
|
+
if _step.skipped:
|
|
437
|
+
state = mlrun.common.schemas.FunctionState.skipped
|
|
438
|
+
else:
|
|
439
|
+
state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
|
|
440
|
+
pod_phase
|
|
441
|
+
)
|
|
442
|
+
function["status"] = {"state": state}
|
|
427
443
|
if isinstance(function["metadata"].get("updated"), datetime.datetime):
|
|
428
444
|
function["metadata"]["updated"] = function["metadata"][
|
|
429
445
|
"updated"
|
|
430
446
|
].isoformat()
|
|
431
|
-
function["step_kind"] =
|
|
447
|
+
function["step_kind"] = _step.step_type
|
|
432
448
|
steps.append(function)
|
|
433
449
|
|
|
434
450
|
step_methods = {
|
|
@@ -446,26 +462,10 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
446
462
|
return steps
|
|
447
463
|
|
|
448
464
|
try:
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
key=lambda _node: _node[1]["finishedAt"],
|
|
452
|
-
)
|
|
453
|
-
for node_name, node in workflow_nodes:
|
|
454
|
-
if node["type"] != "Pod":
|
|
455
|
-
# Skip the parent DAG node
|
|
456
|
-
continue
|
|
457
|
-
|
|
458
|
-
node_template = next(
|
|
459
|
-
template
|
|
460
|
-
for template in workflow_manifest["spec"]["templates"]
|
|
461
|
-
if template["name"] == node["templateName"]
|
|
462
|
-
)
|
|
463
|
-
step_type = node_template["metadata"]["annotations"].get(
|
|
464
|
-
"mlrun/pipeline-step-type"
|
|
465
|
-
)
|
|
466
|
-
step_method = step_methods.get(step_type)
|
|
465
|
+
for step in workflow_manifest.get_steps():
|
|
466
|
+
step_method = step_methods.get(step.step_type)
|
|
467
467
|
if step_method:
|
|
468
|
-
step_method(
|
|
468
|
+
step_method(step)
|
|
469
469
|
return steps
|
|
470
470
|
except Exception:
|
|
471
471
|
# If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
|
|
@@ -481,7 +481,9 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
481
481
|
)
|
|
482
482
|
|
|
483
483
|
@staticmethod
|
|
484
|
-
def _get_workflow_manifest(
|
|
484
|
+
def _get_workflow_manifest(
|
|
485
|
+
workflow_id: str,
|
|
486
|
+
) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
|
|
485
487
|
kfp_url = mlrun.mlconf.resolve_kfp_url(mlrun.mlconf.namespace)
|
|
486
488
|
if not kfp_url:
|
|
487
489
|
raise mlrun.errors.MLRunNotFoundError(
|
|
@@ -495,11 +497,8 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
495
497
|
if not kfp_run:
|
|
496
498
|
return None
|
|
497
499
|
|
|
498
|
-
kfp_run =
|
|
499
|
-
|
|
500
|
-
return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
|
|
501
|
-
except Exception:
|
|
502
|
-
return None
|
|
500
|
+
kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
|
|
501
|
+
return kfp_run.workflow_manifest()
|
|
503
502
|
|
|
504
503
|
def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
|
|
505
504
|
"""
|
|
@@ -539,6 +538,12 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
539
538
|
if notification.is_async
|
|
540
539
|
}
|
|
541
540
|
|
|
541
|
+
@property
|
|
542
|
+
def notifications(self):
|
|
543
|
+
notifications = self._sync_notifications.copy()
|
|
544
|
+
notifications.update(self._async_notifications)
|
|
545
|
+
return notifications
|
|
546
|
+
|
|
542
547
|
def push(
|
|
543
548
|
self,
|
|
544
549
|
message: str,
|
mlrun/utils/v3io_clients.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
from v3io.dataplane import Client as V3IOClient
|
|
17
16
|
from v3io_frames import Client as get_client
|
mlrun/utils/version/version.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlrun
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.0rc19
|
|
4
4
|
Summary: Tracking and config of machine learning runs
|
|
5
5
|
Home-page: https://github.com/mlrun/mlrun
|
|
6
6
|
Author: Yaron Haviv
|
|
@@ -43,15 +43,15 @@ Requires-Dist: semver ~=3.0
|
|
|
43
43
|
Requires-Dist: dependency-injector ~=4.41
|
|
44
44
|
Requires-Dist: fsspec <2024.4,>=2023.9.2
|
|
45
45
|
Requires-Dist: v3iofs ~=0.1.17
|
|
46
|
-
Requires-Dist: storey ~=1.7.
|
|
46
|
+
Requires-Dist: storey ~=1.7.17
|
|
47
47
|
Requires-Dist: inflection ~=0.5.0
|
|
48
48
|
Requires-Dist: python-dotenv ~=0.17.0
|
|
49
49
|
Requires-Dist: setuptools ~=69.1
|
|
50
50
|
Requires-Dist: deprecated ~=1.2
|
|
51
51
|
Requires-Dist: jinja2 >=3.1.3,~=3.1
|
|
52
52
|
Requires-Dist: orjson <4,>=3.9.15
|
|
53
|
-
Requires-Dist: mlrun-pipelines-kfp-common
|
|
54
|
-
Requires-Dist: mlrun-pipelines-kfp-v1-8
|
|
53
|
+
Requires-Dist: mlrun-pipelines-kfp-common ~=0.1.0
|
|
54
|
+
Requires-Dist: mlrun-pipelines-kfp-v1-8 ~=0.1.0
|
|
55
55
|
Provides-Extra: alibaba-oss
|
|
56
56
|
Requires-Dist: ossfs ==2023.12.0 ; extra == 'alibaba-oss'
|
|
57
57
|
Requires-Dist: oss2 ==2.18.1 ; extra == 'alibaba-oss'
|
|
@@ -82,6 +82,7 @@ Requires-Dist: pyopenssl >=23 ; extra == 'all'
|
|
|
82
82
|
Requires-Dist: redis ~=4.3 ; extra == 'all'
|
|
83
83
|
Requires-Dist: s3fs <2024.4,>=2023.9.2 ; extra == 'all'
|
|
84
84
|
Requires-Dist: sqlalchemy ~=1.4 ; extra == 'all'
|
|
85
|
+
Requires-Dist: taos-ws-py ~=0.3.2 ; extra == 'all'
|
|
85
86
|
Provides-Extra: api
|
|
86
87
|
Requires-Dist: uvicorn ~=0.27.1 ; extra == 'api'
|
|
87
88
|
Requires-Dist: dask-kubernetes ~=0.11.0 ; extra == 'api'
|
|
@@ -129,6 +130,7 @@ Requires-Dist: pyopenssl >=23 ; extra == 'complete'
|
|
|
129
130
|
Requires-Dist: redis ~=4.3 ; extra == 'complete'
|
|
130
131
|
Requires-Dist: s3fs <2024.4,>=2023.9.2 ; extra == 'complete'
|
|
131
132
|
Requires-Dist: sqlalchemy ~=1.4 ; extra == 'complete'
|
|
133
|
+
Requires-Dist: taos-ws-py ~=0.3.2 ; extra == 'complete'
|
|
132
134
|
Provides-Extra: complete-api
|
|
133
135
|
Requires-Dist: adlfs ==2023.9.0 ; extra == 'complete-api'
|
|
134
136
|
Requires-Dist: aiobotocore <2.8,>=2.5.0 ; extra == 'complete-api'
|
|
@@ -161,6 +163,7 @@ Requires-Dist: pyopenssl >=23 ; extra == 'complete-api'
|
|
|
161
163
|
Requires-Dist: redis ~=4.3 ; extra == 'complete-api'
|
|
162
164
|
Requires-Dist: s3fs <2024.4,>=2023.9.2 ; extra == 'complete-api'
|
|
163
165
|
Requires-Dist: sqlalchemy ~=1.4 ; extra == 'complete-api'
|
|
166
|
+
Requires-Dist: taos-ws-py ~=0.3.2 ; extra == 'complete-api'
|
|
164
167
|
Requires-Dist: timelength ~=1.1 ; extra == 'complete-api'
|
|
165
168
|
Requires-Dist: uvicorn ~=0.27.1 ; extra == 'complete-api'
|
|
166
169
|
Provides-Extra: dask
|
|
@@ -193,6 +196,8 @@ Requires-Dist: aiobotocore <2.8,>=2.5.0 ; extra == 's3'
|
|
|
193
196
|
Requires-Dist: s3fs <2024.4,>=2023.9.2 ; extra == 's3'
|
|
194
197
|
Provides-Extra: sqlalchemy
|
|
195
198
|
Requires-Dist: sqlalchemy ~=1.4 ; extra == 'sqlalchemy'
|
|
199
|
+
Provides-Extra: tdengine
|
|
200
|
+
Requires-Dist: taos-ws-py ~=0.3.2 ; extra == 'tdengine'
|
|
196
201
|
|
|
197
202
|
<a id="top"></a>
|
|
198
203
|
[](https://github.com/mlrun/mlrun/actions/workflows/build.yaml?query=branch%3Adevelopment)
|