mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +4 -2
- mlrun/alerts/alert.py +75 -8
- mlrun/artifacts/base.py +1 -0
- mlrun/artifacts/manager.py +9 -2
- mlrun/common/constants.py +4 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/__init__.py +3 -1
- mlrun/common/schemas/alert.py +15 -12
- mlrun/common/schemas/api_gateway.py +6 -6
- mlrun/common/schemas/auth.py +5 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/frontend_spec.py +7 -0
- mlrun/common/schemas/function.py +7 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -3
- mlrun/common/schemas/model_monitoring/constants.py +41 -26
- mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
- mlrun/common/schemas/notification.py +69 -12
- mlrun/common/schemas/project.py +45 -12
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +1 -0
- mlrun/config.py +91 -35
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +57 -25
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +125 -37
- mlrun/datastore/base.py +42 -21
- mlrun/datastore/datastore.py +4 -2
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +85 -29
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +25 -12
- mlrun/datastore/sources.py +76 -4
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +102 -131
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/base.py +15 -6
- mlrun/db/httpdb.py +57 -28
- mlrun/db/nopdb.py +29 -5
- mlrun/errors.py +20 -3
- mlrun/execution.py +46 -5
- mlrun/feature_store/api.py +25 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_vector.py +3 -1
- mlrun/feature_store/retrieval/job.py +4 -1
- mlrun/feature_store/retrieval/spark_merger.py +10 -39
- mlrun/feature_store/steps.py +8 -0
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -3
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/k8s_utils.py +48 -2
- mlrun/launcher/client.py +6 -6
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +215 -34
- mlrun/model_monitoring/api.py +38 -24
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +60 -29
- mlrun/model_monitoring/applications/base.py +2 -174
- mlrun/model_monitoring/applications/context.py +197 -70
- mlrun/model_monitoring/applications/evidently_base.py +11 -85
- mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
- mlrun/model_monitoring/applications/results.py +4 -4
- mlrun/model_monitoring/controller.py +110 -282
- mlrun/model_monitoring/db/stores/__init__.py +8 -3
- mlrun/model_monitoring/db/stores/base/store.py +3 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
- mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
- mlrun/model_monitoring/db/tsdb/base.py +147 -15
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
- mlrun/model_monitoring/helpers.py +70 -50
- mlrun/model_monitoring/stream_processing.py +96 -195
- mlrun/model_monitoring/writer.py +13 -5
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/projects/operations.py +16 -8
- mlrun/projects/pipelines.py +126 -115
- mlrun/projects/project.py +286 -129
- mlrun/render.py +3 -3
- mlrun/run.py +38 -19
- mlrun/runtimes/__init__.py +19 -8
- mlrun/runtimes/base.py +4 -1
- mlrun/runtimes/daskjob.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -1
- mlrun/runtimes/kubejob.py +6 -6
- mlrun/runtimes/local.py +12 -5
- mlrun/runtimes/nuclio/api_gateway.py +68 -8
- mlrun/runtimes/nuclio/application/application.py +307 -70
- mlrun/runtimes/nuclio/function.py +63 -14
- mlrun/runtimes/nuclio/serving.py +10 -10
- mlrun/runtimes/pod.py +25 -19
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +16 -17
- mlrun/runtimes/utils.py +34 -0
- mlrun/serving/routers.py +2 -5
- mlrun/serving/server.py +37 -19
- mlrun/serving/states.py +30 -3
- mlrun/serving/v2_serving.py +44 -35
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +150 -36
- mlrun/utils/http.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +0 -1
- mlrun/utils/notifications/notification/webhook.py +8 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/v3io_clients.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/evidently_application.py +0 -20
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
mlrun/serving/states.py
CHANGED
|
@@ -27,6 +27,8 @@ from copy import copy, deepcopy
|
|
|
27
27
|
from inspect import getfullargspec, signature
|
|
28
28
|
from typing import Any, Union
|
|
29
29
|
|
|
30
|
+
import storey.utils
|
|
31
|
+
|
|
30
32
|
import mlrun
|
|
31
33
|
|
|
32
34
|
from ..config import config
|
|
@@ -82,6 +84,9 @@ _task_step_fields = [
|
|
|
82
84
|
]
|
|
83
85
|
|
|
84
86
|
|
|
87
|
+
MAX_ALLOWED_STEPS = 4500
|
|
88
|
+
|
|
89
|
+
|
|
85
90
|
def new_model_endpoint(class_name, model_path, handler=None, **class_args):
|
|
86
91
|
class_args = deepcopy(class_args)
|
|
87
92
|
class_args["model_path"] = model_path
|
|
@@ -386,6 +391,9 @@ class BaseStep(ModelObj):
|
|
|
386
391
|
"""
|
|
387
392
|
raise NotImplementedError("set_flow() can only be called on a FlowStep")
|
|
388
393
|
|
|
394
|
+
def supports_termination(self):
|
|
395
|
+
return False
|
|
396
|
+
|
|
389
397
|
|
|
390
398
|
class TaskStep(BaseStep):
|
|
391
399
|
"""task execution step, runs a class or handler"""
|
|
@@ -728,6 +736,11 @@ class RouterStep(TaskStep):
|
|
|
728
736
|
if not route:
|
|
729
737
|
route = TaskStep(class_name, class_args, handler=handler)
|
|
730
738
|
route.function = function or route.function
|
|
739
|
+
|
|
740
|
+
if len(self._routes) >= MAX_ALLOWED_STEPS:
|
|
741
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
742
|
+
f"Cannot create the serving graph: the maximum number of steps is {MAX_ALLOWED_STEPS}"
|
|
743
|
+
)
|
|
731
744
|
route = self._routes.update(key, route)
|
|
732
745
|
route.set_parent(self)
|
|
733
746
|
return route
|
|
@@ -867,7 +880,10 @@ class QueueStep(BaseStep):
|
|
|
867
880
|
return event
|
|
868
881
|
|
|
869
882
|
if self._stream:
|
|
870
|
-
self.
|
|
883
|
+
full_event = self.options.get("full_event")
|
|
884
|
+
if full_event or full_event is None and self.next:
|
|
885
|
+
data = storey.utils.wrap_event_for_serialization(event, data)
|
|
886
|
+
self._stream.push(data)
|
|
871
887
|
event.terminated = True
|
|
872
888
|
event.body = None
|
|
873
889
|
return event
|
|
@@ -1273,6 +1289,8 @@ class FlowStep(BaseStep):
|
|
|
1273
1289
|
event.body = {"id": event.id}
|
|
1274
1290
|
return event
|
|
1275
1291
|
|
|
1292
|
+
event = storey.utils.unpack_event_if_wrapped(event)
|
|
1293
|
+
|
|
1276
1294
|
if len(self._start_steps) == 0:
|
|
1277
1295
|
return event
|
|
1278
1296
|
next_obj = self._start_steps[0]
|
|
@@ -1380,6 +1398,9 @@ class FlowStep(BaseStep):
|
|
|
1380
1398
|
|
|
1381
1399
|
return step
|
|
1382
1400
|
|
|
1401
|
+
def supports_termination(self):
|
|
1402
|
+
return self.engine != "sync"
|
|
1403
|
+
|
|
1383
1404
|
|
|
1384
1405
|
class RootFlowStep(FlowStep):
|
|
1385
1406
|
"""root flow step"""
|
|
@@ -1618,7 +1639,11 @@ def _init_async_objects(context, steps):
|
|
|
1618
1639
|
if step.path and not skip_stream:
|
|
1619
1640
|
stream_path = step.path
|
|
1620
1641
|
endpoint = None
|
|
1621
|
-
|
|
1642
|
+
# in case of a queue, we default to a full_event=True
|
|
1643
|
+
full_event = step.options.get("full_event")
|
|
1644
|
+
options = {
|
|
1645
|
+
"full_event": full_event or full_event is None and step.next
|
|
1646
|
+
}
|
|
1622
1647
|
options.update(step.options)
|
|
1623
1648
|
|
|
1624
1649
|
kafka_brokers = get_kafka_brokers_from_dict(options, pop=True)
|
|
@@ -1672,7 +1697,9 @@ def _init_async_objects(context, steps):
|
|
|
1672
1697
|
wait_for_result = True
|
|
1673
1698
|
|
|
1674
1699
|
source_args = context.get_param("source_args", {})
|
|
1675
|
-
explicit_ack =
|
|
1700
|
+
explicit_ack = (
|
|
1701
|
+
is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack_enabled()
|
|
1702
|
+
)
|
|
1676
1703
|
|
|
1677
1704
|
# TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
|
|
1678
1705
|
default_source = storey.SyncEmitSource(
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -15,12 +15,12 @@
|
|
|
15
15
|
import threading
|
|
16
16
|
import time
|
|
17
17
|
import traceback
|
|
18
|
-
from typing import Union
|
|
18
|
+
from typing import Optional, Union
|
|
19
19
|
|
|
20
|
-
import mlrun.
|
|
20
|
+
import mlrun.artifacts
|
|
21
|
+
import mlrun.common.model_monitoring.helpers
|
|
21
22
|
import mlrun.common.schemas.model_monitoring
|
|
22
|
-
|
|
23
|
-
from mlrun.config import config
|
|
23
|
+
import mlrun.model_monitoring
|
|
24
24
|
from mlrun.errors import err_to_str
|
|
25
25
|
from mlrun.utils import logger, now_date
|
|
26
26
|
|
|
@@ -102,7 +102,7 @@ class V2ModelServer(StepToDict):
|
|
|
102
102
|
self.error = ""
|
|
103
103
|
self.protocol = protocol or "v2"
|
|
104
104
|
self.model_path = model_path
|
|
105
|
-
self.model_spec: mlrun.artifacts.ModelArtifact = None
|
|
105
|
+
self.model_spec: Optional[mlrun.artifacts.ModelArtifact] = None
|
|
106
106
|
self._input_path = input_path
|
|
107
107
|
self._result_path = result_path
|
|
108
108
|
self._kwargs = kwargs # for to_dict()
|
|
@@ -148,7 +148,7 @@ class V2ModelServer(StepToDict):
|
|
|
148
148
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
149
149
|
return
|
|
150
150
|
|
|
151
|
-
if not self.context.is_mock or self.context.
|
|
151
|
+
if not self.context.is_mock or self.context.monitoring_mock:
|
|
152
152
|
self.model_endpoint_uid = _init_endpoint_record(
|
|
153
153
|
graph_server=server, model=self
|
|
154
154
|
)
|
|
@@ -258,6 +258,7 @@ class V2ModelServer(StepToDict):
|
|
|
258
258
|
"id": event_id,
|
|
259
259
|
"model_name": self.name,
|
|
260
260
|
"outputs": outputs,
|
|
261
|
+
"timestamp": start.isoformat(sep=" ", timespec="microseconds"),
|
|
261
262
|
}
|
|
262
263
|
if self.version:
|
|
263
264
|
response["model_version"] = self.version
|
|
@@ -335,6 +336,7 @@ class V2ModelServer(StepToDict):
|
|
|
335
336
|
else:
|
|
336
337
|
track_request = {"id": event_id, "inputs": inputs or []}
|
|
337
338
|
track_response = {"outputs": outputs or []}
|
|
339
|
+
# TODO : check dict/list
|
|
338
340
|
self._model_logger.push(start, track_request, track_response, op)
|
|
339
341
|
event.body = _update_result_body(self._result_path, original_body, response)
|
|
340
342
|
return event
|
|
@@ -376,8 +378,10 @@ class V2ModelServer(StepToDict):
|
|
|
376
378
|
"""postprocess, before returning response"""
|
|
377
379
|
return request
|
|
378
380
|
|
|
379
|
-
def predict(self, request: dict) ->
|
|
380
|
-
"""model prediction operation
|
|
381
|
+
def predict(self, request: dict) -> list:
|
|
382
|
+
"""model prediction operation
|
|
383
|
+
:return: list with the model prediction results (can be multi-port) or list of lists for multiple predictions
|
|
384
|
+
"""
|
|
381
385
|
raise NotImplementedError()
|
|
382
386
|
|
|
383
387
|
def explain(self, request: dict) -> dict:
|
|
@@ -551,13 +555,13 @@ def _init_endpoint_record(
|
|
|
551
555
|
except mlrun.errors.MLRunNotFoundError:
|
|
552
556
|
model_ep = None
|
|
553
557
|
except mlrun.errors.MLRunBadRequestError as err:
|
|
554
|
-
logger.
|
|
555
|
-
|
|
558
|
+
logger.info(
|
|
559
|
+
"Cannot get the model endpoints store", err=mlrun.errors.err_to_str(err)
|
|
556
560
|
)
|
|
557
561
|
return
|
|
558
562
|
|
|
559
563
|
if model.context.server.track_models and not model_ep:
|
|
560
|
-
logger.
|
|
564
|
+
logger.info("Creating a new model endpoint record", endpoint_id=uid)
|
|
561
565
|
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
562
566
|
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
563
567
|
project=project, labels=model.labels, uid=uid
|
|
@@ -567,9 +571,7 @@ def _init_endpoint_record(
|
|
|
567
571
|
model=versioned_model_name,
|
|
568
572
|
model_class=model.__class__.__name__,
|
|
569
573
|
model_uri=model.model_path,
|
|
570
|
-
stream_path=
|
|
571
|
-
project=project, kind="stream"
|
|
572
|
-
),
|
|
574
|
+
stream_path=model.context.stream.stream_uri,
|
|
573
575
|
active=True,
|
|
574
576
|
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
575
577
|
),
|
|
@@ -585,28 +587,35 @@ def _init_endpoint_record(
|
|
|
585
587
|
model_endpoint=model_endpoint.dict(),
|
|
586
588
|
)
|
|
587
589
|
|
|
588
|
-
elif
|
|
589
|
-
|
|
590
|
-
|
|
590
|
+
elif model_ep:
|
|
591
|
+
attributes = {}
|
|
592
|
+
old_model_uri = model_ep.spec.model_uri
|
|
593
|
+
mlrun.model_monitoring.helpers.enrich_model_endpoint_with_model_uri(
|
|
594
|
+
model_endpoint=model_ep,
|
|
595
|
+
model_obj=model.model_spec,
|
|
596
|
+
)
|
|
597
|
+
if model_ep.spec.model_uri != old_model_uri:
|
|
598
|
+
attributes["model_uri"] = model_ep.spec.model_uri
|
|
599
|
+
if (
|
|
591
600
|
model_ep.spec.monitoring_mode
|
|
592
601
|
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
593
|
-
)
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
602
|
+
) != model.context.server.track_models:
|
|
603
|
+
attributes["monitoring_mode"] = (
|
|
604
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
605
|
+
if model.context.server.track_models
|
|
606
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
607
|
+
)
|
|
608
|
+
if attributes:
|
|
609
|
+
db = mlrun.get_run_db()
|
|
610
|
+
db.patch_model_endpoint(
|
|
611
|
+
project=project,
|
|
612
|
+
endpoint_id=uid,
|
|
613
|
+
attributes=attributes,
|
|
614
|
+
)
|
|
615
|
+
logger.info(
|
|
616
|
+
"Updating model endpoint attributes",
|
|
617
|
+
attributes=attributes,
|
|
618
|
+
endpoint_id=uid,
|
|
619
|
+
)
|
|
611
620
|
|
|
612
621
|
return uid
|
|
@@ -442,6 +442,11 @@ class MLFlowTracker(Tracker):
|
|
|
442
442
|
# Prepare the archive path:
|
|
443
443
|
model_uri = pathlib.Path(model_uri)
|
|
444
444
|
archive_path = pathlib.Path(tmp_path) / f"{model_uri.stem}.zip"
|
|
445
|
+
if not os.path.exists(model_uri):
|
|
446
|
+
local_path = mlflow.artifacts.download_artifacts(
|
|
447
|
+
artifact_uri=str(model_uri)
|
|
448
|
+
)
|
|
449
|
+
model_uri = pathlib.Path(local_path)
|
|
445
450
|
|
|
446
451
|
# TODO add progress bar for the case of large files
|
|
447
452
|
# Zip the artifact:
|
mlrun/utils/async_http.py
CHANGED
|
@@ -237,7 +237,7 @@ class _CustomRequestContext(_RequestContext):
|
|
|
237
237
|
retry_wait = self._retry_options.get_timeout(
|
|
238
238
|
attempt=current_attempt, response=None
|
|
239
239
|
)
|
|
240
|
-
self._logger.
|
|
240
|
+
self._logger.warning(
|
|
241
241
|
"Request failed on retryable exception, retrying",
|
|
242
242
|
retry_wait_secs=retry_wait,
|
|
243
243
|
method=params.method,
|
mlrun/utils/db.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
import abc
|
|
15
16
|
import pickle
|
|
16
17
|
from datetime import datetime
|
|
17
18
|
|
|
@@ -28,12 +29,22 @@ class BaseModel:
|
|
|
28
29
|
columns = [column.key for column in mapper.columns if column.key not in exclude]
|
|
29
30
|
|
|
30
31
|
def get_key_value(c):
|
|
32
|
+
# all (never say never) DB classes have "object" defined as "full_object"
|
|
33
|
+
if c == "object":
|
|
34
|
+
c = "full_object"
|
|
31
35
|
if isinstance(getattr(self, c), datetime):
|
|
32
36
|
return c, getattr(self, c).isoformat()
|
|
33
37
|
return c, getattr(self, c)
|
|
34
38
|
|
|
35
39
|
return dict(map(get_key_value, columns))
|
|
36
40
|
|
|
41
|
+
@abc.abstractmethod
|
|
42
|
+
def get_identifier_string(self):
|
|
43
|
+
"""
|
|
44
|
+
This method must be implemented by any subclass.
|
|
45
|
+
"""
|
|
46
|
+
pass
|
|
47
|
+
|
|
37
48
|
|
|
38
49
|
class HasStruct(BaseModel):
|
|
39
50
|
@property
|
|
@@ -51,3 +62,10 @@ class HasStruct(BaseModel):
|
|
|
51
62
|
exclude = exclude or []
|
|
52
63
|
exclude.append("body")
|
|
53
64
|
return super().to_dict(exclude, strip=strip)
|
|
65
|
+
|
|
66
|
+
@abc.abstractmethod
|
|
67
|
+
def get_identifier_string(self):
|
|
68
|
+
"""
|
|
69
|
+
This method must be implemented by any subclass.
|
|
70
|
+
"""
|
|
71
|
+
pass
|
mlrun/utils/helpers.py
CHANGED
|
@@ -24,6 +24,7 @@ import re
|
|
|
24
24
|
import string
|
|
25
25
|
import sys
|
|
26
26
|
import typing
|
|
27
|
+
import uuid
|
|
27
28
|
import warnings
|
|
28
29
|
from datetime import datetime, timezone
|
|
29
30
|
from importlib import import_module, reload
|
|
@@ -40,7 +41,7 @@ import semver
|
|
|
40
41
|
import yaml
|
|
41
42
|
from dateutil import parser
|
|
42
43
|
from mlrun_pipelines.models import PipelineRun
|
|
43
|
-
from pandas
|
|
44
|
+
from pandas import Timedelta, Timestamp
|
|
44
45
|
from yaml.representer import RepresenterError
|
|
45
46
|
|
|
46
47
|
import mlrun
|
|
@@ -110,14 +111,15 @@ def get_artifact_target(item: dict, project=None):
|
|
|
110
111
|
project_str = project or item["metadata"].get("project")
|
|
111
112
|
tree = item["metadata"].get("tree")
|
|
112
113
|
tag = item["metadata"].get("tag")
|
|
113
|
-
|
|
114
114
|
kind = item.get("kind")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
115
|
+
|
|
116
|
+
if kind in {"dataset", "model", "artifact"} and db_key:
|
|
117
|
+
target = (
|
|
118
|
+
f"{DB_SCHEMA}://{StorePrefix.kind_to_prefix(kind)}/{project_str}/{db_key}"
|
|
119
|
+
)
|
|
120
|
+
target += f":{tag}" if tag else ":latest"
|
|
119
121
|
if tree:
|
|
120
|
-
target
|
|
122
|
+
target += f"@{tree}"
|
|
121
123
|
return target
|
|
122
124
|
|
|
123
125
|
return item["spec"].get("target_path")
|
|
@@ -134,18 +136,25 @@ def is_legacy_artifact(artifact):
|
|
|
134
136
|
logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
|
|
135
137
|
missing = object()
|
|
136
138
|
|
|
137
|
-
is_ipython = False
|
|
139
|
+
is_ipython = False # is IPython terminal, including Jupyter
|
|
140
|
+
is_jupyter = False # is Jupyter notebook/lab terminal
|
|
138
141
|
try:
|
|
139
|
-
import IPython
|
|
142
|
+
import IPython.core.getipython
|
|
143
|
+
|
|
144
|
+
ipy = IPython.core.getipython.get_ipython()
|
|
145
|
+
|
|
146
|
+
is_ipython = ipy is not None
|
|
147
|
+
is_jupyter = (
|
|
148
|
+
is_ipython
|
|
149
|
+
# not IPython
|
|
150
|
+
and "Terminal" not in str(type(ipy))
|
|
151
|
+
)
|
|
140
152
|
|
|
141
|
-
ipy
|
|
142
|
-
|
|
143
|
-
if ipy and "Terminal" not in str(type(ipy)):
|
|
144
|
-
is_ipython = True
|
|
145
|
-
except ImportError:
|
|
153
|
+
del ipy
|
|
154
|
+
except ModuleNotFoundError:
|
|
146
155
|
pass
|
|
147
156
|
|
|
148
|
-
if
|
|
157
|
+
if is_jupyter and config.nest_asyncio_enabled in ["1", "True"]:
|
|
149
158
|
# bypass Jupyter asyncio bug
|
|
150
159
|
import nest_asyncio
|
|
151
160
|
|
|
@@ -819,7 +828,6 @@ def enrich_image_url(
|
|
|
819
828
|
tag += resolve_image_tag_suffix(
|
|
820
829
|
mlrun_version=mlrun_version, python_version=client_python_version
|
|
821
830
|
)
|
|
822
|
-
registry = config.images_registry
|
|
823
831
|
|
|
824
832
|
# it's an mlrun image if the repository is mlrun
|
|
825
833
|
is_mlrun_image = image_url.startswith("mlrun/") or "/mlrun/" in image_url
|
|
@@ -827,6 +835,10 @@ def enrich_image_url(
|
|
|
827
835
|
if is_mlrun_image and tag and ":" not in image_url:
|
|
828
836
|
image_url = f"{image_url}:{tag}"
|
|
829
837
|
|
|
838
|
+
registry = (
|
|
839
|
+
config.images_registry if is_mlrun_image else config.vendor_images_registry
|
|
840
|
+
)
|
|
841
|
+
|
|
830
842
|
enrich_registry = False
|
|
831
843
|
# enrich registry only if images_to_enrich_registry provided
|
|
832
844
|
# example: "^mlrun/*" means enrich only if the image repository is mlrun and registry is not specified (in which
|
|
@@ -1005,6 +1017,23 @@ def get_workflow_url(project, id=None):
|
|
|
1005
1017
|
return url
|
|
1006
1018
|
|
|
1007
1019
|
|
|
1020
|
+
def get_kfp_project_filter(project_name: str) -> str:
|
|
1021
|
+
"""
|
|
1022
|
+
Generates a filter string for KFP runs, using a substring predicate
|
|
1023
|
+
on the run's 'name' field. This is used as a heuristic to retrieve runs that are associated
|
|
1024
|
+
with a specific project. The 'op: 9' operator indicates that the filter checks if the
|
|
1025
|
+
project name appears as a substring in the run's name, ensuring that we can identify
|
|
1026
|
+
runs belonging to the desired project.
|
|
1027
|
+
"""
|
|
1028
|
+
is_substring_op = 9
|
|
1029
|
+
project_name_filter = {
|
|
1030
|
+
"predicates": [
|
|
1031
|
+
{"key": "name", "op": is_substring_op, "string_value": project_name}
|
|
1032
|
+
]
|
|
1033
|
+
}
|
|
1034
|
+
return json.dumps(project_name_filter)
|
|
1035
|
+
|
|
1036
|
+
|
|
1008
1037
|
def are_strings_in_exception_chain_messages(
|
|
1009
1038
|
exception: Exception, strings_list: list[str]
|
|
1010
1039
|
) -> bool:
|
|
@@ -1402,11 +1431,27 @@ def is_running_in_jupyter_notebook() -> bool:
|
|
|
1402
1431
|
Check if the code is running inside a Jupyter Notebook.
|
|
1403
1432
|
:return: True if running inside a Jupyter Notebook, False otherwise.
|
|
1404
1433
|
"""
|
|
1405
|
-
|
|
1434
|
+
return is_jupyter
|
|
1406
1435
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1436
|
+
|
|
1437
|
+
def create_ipython_display():
|
|
1438
|
+
"""
|
|
1439
|
+
Create an IPython display object and fill it with initial content.
|
|
1440
|
+
We can later use the returned display_id with the update_display method to update the content.
|
|
1441
|
+
If IPython is not installed, a warning will be logged and None will be returned.
|
|
1442
|
+
"""
|
|
1443
|
+
if is_ipython:
|
|
1444
|
+
import IPython
|
|
1445
|
+
|
|
1446
|
+
display_id = uuid.uuid4().hex
|
|
1447
|
+
content = IPython.display.HTML(
|
|
1448
|
+
f'<div id="{display_id}">Temporary Display Content</div>'
|
|
1449
|
+
)
|
|
1450
|
+
IPython.display.display(content, display_id=display_id)
|
|
1451
|
+
return display_id
|
|
1452
|
+
|
|
1453
|
+
# returning None if IPython is not installed, this method shouldn't be called in that case but logging for sanity
|
|
1454
|
+
logger.debug("IPython is not installed, cannot create IPython display")
|
|
1410
1455
|
|
|
1411
1456
|
|
|
1412
1457
|
def as_number(field_name, field_value):
|
|
@@ -1617,28 +1662,25 @@ def additional_filters_warning(additional_filters, class_name):
|
|
|
1617
1662
|
)
|
|
1618
1663
|
|
|
1619
1664
|
|
|
1620
|
-
def
|
|
1665
|
+
def merge_dicts_with_precedence(*dicts: dict) -> dict:
|
|
1621
1666
|
"""
|
|
1622
|
-
Merge
|
|
1667
|
+
Merge multiple dictionaries with precedence given to keys from later dictionaries.
|
|
1623
1668
|
|
|
1624
|
-
This function merges
|
|
1625
|
-
|
|
1626
|
-
the value from
|
|
1669
|
+
This function merges an arbitrary number of dictionaries, where keys from dictionaries later
|
|
1670
|
+
in the argument list take precedence over keys from dictionaries earlier in the list. If all
|
|
1671
|
+
dictionaries contain the same key, the value from the last dictionary with that key will
|
|
1672
|
+
overwrite the values from earlier dictionaries.
|
|
1627
1673
|
|
|
1628
1674
|
Example:
|
|
1629
1675
|
>>> first_dict = {"key1": "value1", "key2": "value2"}
|
|
1630
1676
|
>>> second_dict = {"key2": "new_value2", "key3": "value3"}
|
|
1631
|
-
>>>
|
|
1632
|
-
|
|
1677
|
+
>>> third_dict = {"key3": "new_value3", "key4": "value4"}
|
|
1678
|
+
>>> merge_dicts_with_precedence(first_dict, second_dict, third_dict)
|
|
1679
|
+
{'key1': 'value1', 'key2': 'new_value2', 'key3': 'new_value3', 'key4': 'value4'}
|
|
1633
1680
|
|
|
1634
|
-
|
|
1635
|
-
- The merge operation uses the ** operator in Python, which combines key-value pairs
|
|
1636
|
-
from each dictionary. Later dictionaries take precedence when there are conflicting keys.
|
|
1681
|
+
- If no dictionaries are provided, the function returns an empty dictionary.
|
|
1637
1682
|
"""
|
|
1638
|
-
return {
|
|
1639
|
-
**(first_dict or {}),
|
|
1640
|
-
**(second_dict or {}),
|
|
1641
|
-
}
|
|
1683
|
+
return {k: v for d in dicts if d for k, v in d.items()}
|
|
1642
1684
|
|
|
1643
1685
|
|
|
1644
1686
|
def validate_component_version_compatibility(
|
|
@@ -1682,11 +1724,21 @@ def validate_component_version_compatibility(
|
|
|
1682
1724
|
)
|
|
1683
1725
|
return True
|
|
1684
1726
|
|
|
1727
|
+
# Feature might have been back-ported e.g. nuclio node selection is supported from
|
|
1728
|
+
# 1.5.20 and 1.6.10 but not in 1.6.9 - therefore we reverse sort to validate against 1.6.x 1st and
|
|
1729
|
+
# then against 1.5.x
|
|
1685
1730
|
parsed_min_versions.sort(reverse=True)
|
|
1686
1731
|
for parsed_min_version in parsed_min_versions:
|
|
1687
|
-
if
|
|
1732
|
+
if (
|
|
1733
|
+
parsed_current_version.major == parsed_min_version.major
|
|
1734
|
+
and parsed_current_version.minor == parsed_min_version.minor
|
|
1735
|
+
and parsed_current_version.patch < parsed_min_version.patch
|
|
1736
|
+
):
|
|
1688
1737
|
return False
|
|
1689
|
-
|
|
1738
|
+
|
|
1739
|
+
if parsed_current_version >= parsed_min_version:
|
|
1740
|
+
return True
|
|
1741
|
+
return False
|
|
1690
1742
|
|
|
1691
1743
|
|
|
1692
1744
|
def format_alert_summary(
|
|
@@ -1698,6 +1750,28 @@ def format_alert_summary(
|
|
|
1698
1750
|
return result
|
|
1699
1751
|
|
|
1700
1752
|
|
|
1753
|
+
def is_parquet_file(file_path, format_=None):
|
|
1754
|
+
return (file_path and file_path.endswith((".parquet", ".pq"))) or (
|
|
1755
|
+
format_ == "parquet"
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+
def validate_single_def_handler(function_kind: str, code: str):
|
|
1760
|
+
# The name of MLRun's wrapper is 'handler', which is why the handler function name cannot be 'handler'
|
|
1761
|
+
# it would override MLRun's wrapper
|
|
1762
|
+
if function_kind == "mlrun":
|
|
1763
|
+
# Find all lines that start with "def handler("
|
|
1764
|
+
pattern = re.compile(r"^def handler\(", re.MULTILINE)
|
|
1765
|
+
matches = pattern.findall(code)
|
|
1766
|
+
|
|
1767
|
+
# Only MLRun's wrapper handler (footer) can be in the code
|
|
1768
|
+
if len(matches) > 1:
|
|
1769
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1770
|
+
"The code file contains a function named “handler“, which is reserved. "
|
|
1771
|
+
+ "Use a different name for your function."
|
|
1772
|
+
)
|
|
1773
|
+
|
|
1774
|
+
|
|
1701
1775
|
def _reload(module, max_recursion_depth):
|
|
1702
1776
|
"""Recursively reload modules."""
|
|
1703
1777
|
if max_recursion_depth <= 0:
|
|
@@ -1708,3 +1782,43 @@ def _reload(module, max_recursion_depth):
|
|
|
1708
1782
|
attribute = getattr(module, attribute_name)
|
|
1709
1783
|
if type(attribute) is ModuleType:
|
|
1710
1784
|
_reload(attribute, max_recursion_depth - 1)
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
def run_with_retry(
|
|
1788
|
+
retry_count: int,
|
|
1789
|
+
func: typing.Callable,
|
|
1790
|
+
retry_on_exceptions: typing.Union[
|
|
1791
|
+
type[Exception],
|
|
1792
|
+
tuple[type[Exception]],
|
|
1793
|
+
] = None,
|
|
1794
|
+
*args,
|
|
1795
|
+
**kwargs,
|
|
1796
|
+
):
|
|
1797
|
+
"""
|
|
1798
|
+
Executes a function with retry logic upon encountering specified exceptions.
|
|
1799
|
+
|
|
1800
|
+
:param retry_count: The number of times to retry the function execution.
|
|
1801
|
+
:param func: The function to execute.
|
|
1802
|
+
:param retry_on_exceptions: Exception(s) that trigger a retry. Can be a single exception or a tuple of exceptions.
|
|
1803
|
+
:param args: Positional arguments to pass to the function.
|
|
1804
|
+
:param kwargs: Keyword arguments to pass to the function.
|
|
1805
|
+
:return: The result of the function execution if successful.
|
|
1806
|
+
:raises Exception: Re-raises the last exception encountered after all retries are exhausted.
|
|
1807
|
+
"""
|
|
1808
|
+
if retry_on_exceptions is None:
|
|
1809
|
+
retry_on_exceptions = (Exception,)
|
|
1810
|
+
elif isinstance(retry_on_exceptions, list):
|
|
1811
|
+
retry_on_exceptions = tuple(retry_on_exceptions)
|
|
1812
|
+
|
|
1813
|
+
last_exception = None
|
|
1814
|
+
for attempt in range(retry_count + 1):
|
|
1815
|
+
try:
|
|
1816
|
+
return func(*args, **kwargs)
|
|
1817
|
+
except retry_on_exceptions as exc:
|
|
1818
|
+
last_exception = exc
|
|
1819
|
+
logger.warning(
|
|
1820
|
+
f"Attempt {{{attempt}/ {retry_count}}} failed with exception: {exc}",
|
|
1821
|
+
)
|
|
1822
|
+
if attempt == retry_count:
|
|
1823
|
+
raise
|
|
1824
|
+
raise last_exception
|
mlrun/utils/http.py
CHANGED
|
@@ -95,7 +95,7 @@ class HTTPSessionWithRetry(requests.Session):
|
|
|
95
95
|
total=self.max_retries,
|
|
96
96
|
backoff_factor=self.retry_backoff_factor,
|
|
97
97
|
status_forcelist=config.http_retry_defaults.status_codes,
|
|
98
|
-
|
|
98
|
+
allowed_methods=self._retry_methods,
|
|
99
99
|
# we want to retry but not to raise since we do want that last response (to parse details on the
|
|
100
100
|
# error from response body) we'll handle raising ourselves
|
|
101
101
|
raise_on_status=False,
|
|
@@ -60,7 +60,14 @@ class WebhookNotification(NotificationBase):
|
|
|
60
60
|
request_body["runs"] = runs
|
|
61
61
|
|
|
62
62
|
if alert:
|
|
63
|
-
request_body["
|
|
63
|
+
request_body["name"] = alert.name
|
|
64
|
+
request_body["project"] = alert.project
|
|
65
|
+
request_body["severity"] = alert.severity
|
|
66
|
+
if alert.summary:
|
|
67
|
+
request_body["summary"] = mlrun.utils.helpers.format_alert_summary(
|
|
68
|
+
alert, event_data
|
|
69
|
+
)
|
|
70
|
+
|
|
64
71
|
if event_data:
|
|
65
72
|
request_body["value"] = event_data.value_dict
|
|
66
73
|
request_body["id"] = event_data.entity.ids[0]
|
|
@@ -484,7 +484,7 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
484
484
|
def _get_workflow_manifest(
|
|
485
485
|
workflow_id: str,
|
|
486
486
|
) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
|
|
487
|
-
kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf)
|
|
487
|
+
kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf.kfp_url)
|
|
488
488
|
|
|
489
489
|
# arbitrary timeout of 5 seconds, the workflow should be done by now
|
|
490
490
|
kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
|
mlrun/utils/v3io_clients.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from v3io.dataplane import Client as V3IOClient
|
|
16
|
-
from v3io_frames import Client as
|
|
16
|
+
from v3io_frames import Client as V3IOFramesClient
|
|
17
17
|
from v3io_frames.client import ClientBase
|
|
18
18
|
|
|
19
19
|
_v3io_clients: dict[frozenset, V3IOClient] = {}
|
|
@@ -24,7 +24,7 @@ def get_frames_client(**kwargs) -> ClientBase:
|
|
|
24
24
|
global _frames_clients
|
|
25
25
|
kw_set = frozenset(kwargs.items())
|
|
26
26
|
if kw_set not in _frames_clients:
|
|
27
|
-
_frames_clients[kw_set] =
|
|
27
|
+
_frames_clients[kw_set] = V3IOFramesClient(**kwargs)
|
|
28
28
|
|
|
29
29
|
return _frames_clients[kw_set]
|
|
30
30
|
|
mlrun/utils/version/version.json
CHANGED