mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +59 -20
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +74 -73
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +79 -55
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/lists.py +2 -0
- mlrun/model.py +31 -2
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +50 -37
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +51 -17
- mlrun/projects/project.py +77 -61
- mlrun/render.py +13 -4
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +8 -8
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +28 -8
- mlrun/utils/helpers.py +20 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
- mlrun/datastore/helpers.py +0 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
|
@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
540
540
|
and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
|
|
541
541
|
== "null"
|
|
542
542
|
):
|
|
543
|
-
endpoint[
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
543
|
+
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
|
|
544
|
+
json.dumps(
|
|
545
|
+
{
|
|
546
|
+
mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
|
|
547
|
+
mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
548
|
+
mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
549
|
+
}
|
|
550
550
|
}
|
|
551
|
-
|
|
551
|
+
)
|
|
552
552
|
)
|
|
553
553
|
# Validate key `uid` instead of `endpoint_id`
|
|
554
554
|
# For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
|
|
555
555
|
if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
|
|
556
|
-
endpoint[
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
556
|
+
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
|
|
557
|
+
endpoint[
|
|
558
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
559
|
+
]
|
|
560
|
+
)
|
|
561
561
|
|
|
562
562
|
@staticmethod
|
|
563
563
|
def _encode_field(field: typing.Union[str, bytes]) -> bytes:
|
|
@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class SQLModelEndpointStore(ModelEndpointStore):
|
|
34
|
-
|
|
35
34
|
"""
|
|
36
35
|
Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
|
|
37
36
|
SQL toolkit that handles the communication with the database. When using SQL for storing the model endpoints
|
|
@@ -24,6 +24,7 @@ import mlrun
|
|
|
24
24
|
import mlrun.common.model_monitoring.helpers
|
|
25
25
|
import mlrun.config
|
|
26
26
|
import mlrun.datastore.targets
|
|
27
|
+
import mlrun.feature_store as fstore
|
|
27
28
|
import mlrun.feature_store.steps
|
|
28
29
|
import mlrun.model_monitoring.prometheus
|
|
29
30
|
import mlrun.serving.states
|
|
@@ -49,7 +50,7 @@ class EventStreamProcessor:
|
|
|
49
50
|
parquet_batching_timeout_secs: int,
|
|
50
51
|
parquet_target: str,
|
|
51
52
|
sample_window: int = 10,
|
|
52
|
-
aggregate_windows: typing.Optional[
|
|
53
|
+
aggregate_windows: typing.Optional[list[str]] = None,
|
|
53
54
|
aggregate_period: str = "30s",
|
|
54
55
|
model_monitoring_access_key: str = None,
|
|
55
56
|
):
|
|
@@ -349,7 +350,6 @@ class EventStreamProcessor:
|
|
|
349
350
|
rate="10/m",
|
|
350
351
|
time_col=EventFieldType.TIMESTAMP,
|
|
351
352
|
container=self.tsdb_container,
|
|
352
|
-
access_key=self.v3io_access_key,
|
|
353
353
|
v3io_frames=self.v3io_framesd,
|
|
354
354
|
infer_columns_from_data=True,
|
|
355
355
|
index_cols=[
|
|
@@ -587,6 +587,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
|
|
|
587
587
|
for key in [
|
|
588
588
|
EventFieldType.FEATURES,
|
|
589
589
|
EventFieldType.NAMED_FEATURES,
|
|
590
|
+
EventFieldType.PREDICTION,
|
|
591
|
+
EventFieldType.NAMED_PREDICTIONS,
|
|
590
592
|
]:
|
|
591
593
|
event.pop(key, None)
|
|
592
594
|
|
|
@@ -629,14 +631,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
629
631
|
self.project: str = project
|
|
630
632
|
|
|
631
633
|
# First and last requests timestamps (value) of each endpoint (key)
|
|
632
|
-
self.first_request:
|
|
633
|
-
self.last_request:
|
|
634
|
+
self.first_request: dict[str, str] = dict()
|
|
635
|
+
self.last_request: dict[str, str] = dict()
|
|
634
636
|
|
|
635
637
|
# Number of errors (value) per endpoint (key)
|
|
636
|
-
self.error_count:
|
|
638
|
+
self.error_count: dict[str, int] = collections.defaultdict(int)
|
|
637
639
|
|
|
638
640
|
# Set of endpoints in the current events
|
|
639
|
-
self.endpoints:
|
|
641
|
+
self.endpoints: set[str] = set()
|
|
640
642
|
|
|
641
643
|
def do(self, full_event):
|
|
642
644
|
event = full_event.body
|
|
@@ -745,18 +747,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
745
747
|
# in list of events. This list will be used as the body for the storey event.
|
|
746
748
|
events = []
|
|
747
749
|
for i, (feature, prediction) in enumerate(zip(features, predictions)):
|
|
748
|
-
# Validate that inputs are based on numeric values
|
|
749
|
-
if not self.is_valid(
|
|
750
|
-
endpoint_id,
|
|
751
|
-
self.is_list_of_numerics,
|
|
752
|
-
feature,
|
|
753
|
-
["request", "inputs", f"[{i}]"],
|
|
754
|
-
):
|
|
755
|
-
return None
|
|
756
|
-
|
|
757
750
|
if not isinstance(prediction, list):
|
|
758
751
|
prediction = [prediction]
|
|
759
752
|
|
|
753
|
+
if not isinstance(feature, list):
|
|
754
|
+
feature = [feature]
|
|
755
|
+
|
|
760
756
|
events.append(
|
|
761
757
|
{
|
|
762
758
|
EventFieldType.FUNCTION_URI: function_uri,
|
|
@@ -803,18 +799,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
803
799
|
f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
|
|
804
800
|
)
|
|
805
801
|
|
|
806
|
-
@staticmethod
|
|
807
|
-
def is_list_of_numerics(
|
|
808
|
-
field: typing.List[typing.Union[int, float, dict, list]],
|
|
809
|
-
dict_path: typing.List[str],
|
|
810
|
-
):
|
|
811
|
-
if all(isinstance(x, int) or isinstance(x, float) for x in field):
|
|
812
|
-
return True
|
|
813
|
-
logger.error(
|
|
814
|
-
f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
|
|
815
|
-
)
|
|
816
|
-
return False
|
|
817
|
-
|
|
818
802
|
def resume_state(self, endpoint_id):
|
|
819
803
|
# Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
|
|
820
804
|
# left them
|
|
@@ -849,7 +833,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
849
833
|
endpoint_id: str,
|
|
850
834
|
validation_function,
|
|
851
835
|
field: typing.Any,
|
|
852
|
-
dict_path:
|
|
836
|
+
dict_path: list[str],
|
|
853
837
|
):
|
|
854
838
|
if validation_function(field, dict_path):
|
|
855
839
|
return True
|
|
@@ -857,7 +841,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
857
841
|
return False
|
|
858
842
|
|
|
859
843
|
|
|
860
|
-
def is_not_none(field: typing.Any, dict_path:
|
|
844
|
+
def is_not_none(field: typing.Any, dict_path: list[str]):
|
|
861
845
|
if field is not None:
|
|
862
846
|
return True
|
|
863
847
|
logger.error(
|
|
@@ -946,9 +930,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
946
930
|
return self.label_columns[endpoint_id]
|
|
947
931
|
return None
|
|
948
932
|
|
|
949
|
-
def do(self, event:
|
|
933
|
+
def do(self, event: dict):
|
|
950
934
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
951
935
|
|
|
936
|
+
feature_values = event[EventFieldType.FEATURES]
|
|
937
|
+
label_values = event[EventFieldType.PREDICTION]
|
|
952
938
|
# Get feature names and label columns
|
|
953
939
|
if endpoint_id not in self.feature_names:
|
|
954
940
|
endpoint_record = get_endpoint_record(
|
|
@@ -984,6 +970,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
984
970
|
},
|
|
985
971
|
)
|
|
986
972
|
|
|
973
|
+
update_monitoring_feature_set(
|
|
974
|
+
endpoint_record=endpoint_record,
|
|
975
|
+
feature_names=feature_names,
|
|
976
|
+
feature_values=feature_values,
|
|
977
|
+
)
|
|
978
|
+
|
|
987
979
|
# Similar process with label columns
|
|
988
980
|
if not label_columns and self._infer_columns_from_data:
|
|
989
981
|
label_columns = self._infer_label_columns_from_data(event)
|
|
@@ -1002,6 +994,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1002
994
|
endpoint_id=endpoint_id,
|
|
1003
995
|
attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
|
|
1004
996
|
)
|
|
997
|
+
update_monitoring_feature_set(
|
|
998
|
+
endpoint_record=endpoint_record,
|
|
999
|
+
feature_names=label_columns,
|
|
1000
|
+
feature_values=label_values,
|
|
1001
|
+
)
|
|
1005
1002
|
|
|
1006
1003
|
self.label_columns[endpoint_id] = label_columns
|
|
1007
1004
|
self.feature_names[endpoint_id] = feature_names
|
|
@@ -1019,7 +1016,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1019
1016
|
|
|
1020
1017
|
# Add feature_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1021
1018
|
feature_names = self.feature_names[endpoint_id]
|
|
1022
|
-
feature_values = event[EventFieldType.FEATURES]
|
|
1023
1019
|
self._map_dictionary_values(
|
|
1024
1020
|
event=event,
|
|
1025
1021
|
named_iters=feature_names,
|
|
@@ -1029,7 +1025,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1029
1025
|
|
|
1030
1026
|
# Add label_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1031
1027
|
label_names = self.label_columns[endpoint_id]
|
|
1032
|
-
label_values = event[EventFieldType.PREDICTION]
|
|
1033
1028
|
self._map_dictionary_values(
|
|
1034
1029
|
event=event,
|
|
1035
1030
|
named_iters=label_names,
|
|
@@ -1045,9 +1040,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1045
1040
|
|
|
1046
1041
|
@staticmethod
|
|
1047
1042
|
def _map_dictionary_values(
|
|
1048
|
-
event:
|
|
1049
|
-
named_iters:
|
|
1050
|
-
values_iters:
|
|
1043
|
+
event: dict,
|
|
1044
|
+
named_iters: list,
|
|
1045
|
+
values_iters: list,
|
|
1051
1046
|
mapping_dictionary: str,
|
|
1052
1047
|
):
|
|
1053
1048
|
"""Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
|
|
@@ -1082,7 +1077,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
|
|
|
1082
1077
|
self.project = project
|
|
1083
1078
|
self.model_endpoint_store_target = model_endpoint_store_target
|
|
1084
1079
|
|
|
1085
|
-
def do(self, event:
|
|
1080
|
+
def do(self, event: dict):
|
|
1086
1081
|
update_endpoint_record(
|
|
1087
1082
|
project=self.project,
|
|
1088
1083
|
endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
|
|
@@ -1117,7 +1112,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
|
|
|
1117
1112
|
self.table = table
|
|
1118
1113
|
self.keys = set()
|
|
1119
1114
|
|
|
1120
|
-
def do(self, event:
|
|
1115
|
+
def do(self, event: dict):
|
|
1121
1116
|
key_set = set(event.keys())
|
|
1122
1117
|
if not key_set.issubset(self.keys):
|
|
1123
1118
|
self.keys.update(key_set)
|
|
@@ -1241,3 +1236,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
|
|
|
1241
1236
|
project=project,
|
|
1242
1237
|
)
|
|
1243
1238
|
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def update_monitoring_feature_set(
|
|
1242
|
+
endpoint_record: dict[str, typing.Any],
|
|
1243
|
+
feature_names: list[str],
|
|
1244
|
+
feature_values: list[typing.Any],
|
|
1245
|
+
):
|
|
1246
|
+
monitoring_feature_set = fstore.get_feature_set(
|
|
1247
|
+
endpoint_record[
|
|
1248
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
|
|
1249
|
+
]
|
|
1250
|
+
)
|
|
1251
|
+
for name, val in zip(feature_names, feature_values):
|
|
1252
|
+
monitoring_feature_set.add_feature(
|
|
1253
|
+
fstore.Feature(name=name, value_type=type(val))
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
monitoring_feature_set.save()
|
|
@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
838
838
|
"""
|
|
839
839
|
if isinstance(obj, dict):
|
|
840
840
|
for key, value in obj.items():
|
|
841
|
-
obj[
|
|
842
|
-
PandasDataFramePackager._prepare_result(obj=
|
|
843
|
-
|
|
841
|
+
obj[PandasDataFramePackager._prepare_result(obj=key)] = (
|
|
842
|
+
PandasDataFramePackager._prepare_result(obj=value)
|
|
843
|
+
)
|
|
844
844
|
elif isinstance(obj, list):
|
|
845
845
|
for i, value in enumerate(obj):
|
|
846
846
|
obj[i] = PandasDataFramePackager._prepare_result(obj=value)
|
mlrun/package/utils/_archiver.py
CHANGED
|
@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
|
|
|
179
179
|
|
|
180
180
|
# Extract:
|
|
181
181
|
with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
|
|
182
|
-
|
|
182
|
+
# use 'data' to ensure no security risks are imposed by the archive files
|
|
183
|
+
# see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
|
|
184
|
+
tar_file.extractall(directory_path, filter="data")
|
|
183
185
|
|
|
184
186
|
return str(directory_path)
|
|
185
187
|
|
mlrun/platforms/iguazio.py
CHANGED
|
@@ -16,19 +16,15 @@ import json
|
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
18
|
from collections import namedtuple
|
|
19
|
-
from datetime import datetime
|
|
20
|
-
from http import HTTPStatus
|
|
21
19
|
from urllib.parse import urlparse
|
|
22
20
|
|
|
23
21
|
import kfp.dsl
|
|
24
22
|
import requests
|
|
25
23
|
import semver
|
|
26
|
-
import urllib3
|
|
27
24
|
import v3io
|
|
28
25
|
|
|
29
26
|
import mlrun.errors
|
|
30
27
|
from mlrun.config import config as mlconf
|
|
31
|
-
from mlrun.errors import err_to_str
|
|
32
28
|
from mlrun.utils import dict_to_json
|
|
33
29
|
|
|
34
30
|
_cached_control_session = None
|
|
@@ -488,25 +484,6 @@ class V3ioStreamClient:
|
|
|
488
484
|
return response.output.records
|
|
489
485
|
|
|
490
486
|
|
|
491
|
-
def create_control_session(url, username, password):
|
|
492
|
-
# for systems without production cert - silence no cert verification WARN
|
|
493
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
494
|
-
if not username or not password:
|
|
495
|
-
raise ValueError("cannot create session key, missing username or password")
|
|
496
|
-
|
|
497
|
-
session = requests.Session()
|
|
498
|
-
session.auth = (username, password)
|
|
499
|
-
try:
|
|
500
|
-
auth = session.post(f"{url}/api/sessions", verify=False)
|
|
501
|
-
except OSError as exc:
|
|
502
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
503
|
-
|
|
504
|
-
if not auth.ok:
|
|
505
|
-
raise OSError(f"failed to create session: {url}, {auth.text}")
|
|
506
|
-
|
|
507
|
-
return auth.json()["data"]["id"]
|
|
508
|
-
|
|
509
|
-
|
|
510
487
|
def is_iguazio_endpoint(endpoint_url: str) -> bool:
|
|
511
488
|
# TODO: find a better heuristic
|
|
512
489
|
return ".default-tenant." in endpoint_url
|
|
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
|
|
|
533
510
|
return False
|
|
534
511
|
|
|
535
512
|
|
|
536
|
-
def is_iguazio_system_2_10_or_above(dashboard_url):
|
|
537
|
-
# for systems without production cert - silence no cert verification WARN
|
|
538
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
539
|
-
response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
|
|
540
|
-
|
|
541
|
-
if not response.ok:
|
|
542
|
-
if response.status_code == HTTPStatus.NOT_FOUND.value:
|
|
543
|
-
# in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
|
|
544
|
-
# found
|
|
545
|
-
return False
|
|
546
|
-
response.raise_for_status()
|
|
547
|
-
|
|
548
|
-
return True
|
|
549
|
-
|
|
550
|
-
|
|
551
513
|
# we assign the control session or access key to the password since this is iguazio auth scheme
|
|
552
514
|
# (requests should be sent with username:control_session/access_key as auth header)
|
|
553
515
|
def add_or_refresh_credentials(
|
|
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
|
|
|
577
539
|
# (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
|
|
578
540
|
if not is_iguazio_endpoint(api_url):
|
|
579
541
|
return "", "", token
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
raise ValueError(
|
|
587
|
-
"username and access key required to authenticate against iguazio system"
|
|
588
|
-
)
|
|
589
|
-
return username, token, ""
|
|
590
|
-
|
|
591
|
-
if not username or not password:
|
|
592
|
-
raise ValueError("username and password needed to create session")
|
|
593
|
-
|
|
594
|
-
global _cached_control_session
|
|
595
|
-
now = datetime.now()
|
|
596
|
-
if _cached_control_session:
|
|
597
|
-
if (
|
|
598
|
-
_cached_control_session[2] == username
|
|
599
|
-
and _cached_control_session[3] == password
|
|
600
|
-
and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
|
|
601
|
-
):
|
|
602
|
-
return _cached_control_session[2], _cached_control_session[0], ""
|
|
603
|
-
|
|
604
|
-
control_session = create_control_session(iguazio_dashboard_url, username, password)
|
|
605
|
-
_cached_control_session = (control_session, now, username, password)
|
|
606
|
-
return username, control_session, ""
|
|
542
|
+
|
|
543
|
+
if not username or not token:
|
|
544
|
+
raise ValueError(
|
|
545
|
+
"username and access key required to authenticate against iguazio system"
|
|
546
|
+
)
|
|
547
|
+
return username, token, ""
|
|
607
548
|
|
|
608
549
|
|
|
609
550
|
def parse_path(url, suffix="/"):
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import builtins
|
|
16
|
+
import http
|
|
16
17
|
import importlib.util as imputil
|
|
17
18
|
import os
|
|
18
19
|
import tempfile
|
|
@@ -69,16 +70,16 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
69
70
|
|
|
70
71
|
def __init__(
|
|
71
72
|
self,
|
|
72
|
-
engine=None,
|
|
73
|
-
code=None,
|
|
74
|
-
path=None,
|
|
75
|
-
args=None,
|
|
76
|
-
name=None,
|
|
77
|
-
handler=None,
|
|
78
|
-
args_schema: dict = None,
|
|
73
|
+
engine: typing.Optional[str] = None,
|
|
74
|
+
code: typing.Optional[str] = None,
|
|
75
|
+
path: typing.Optional[str] = None,
|
|
76
|
+
args: typing.Optional[dict] = None,
|
|
77
|
+
name: typing.Optional[str] = None,
|
|
78
|
+
handler: typing.Optional[str] = None,
|
|
79
|
+
args_schema: typing.Optional[dict] = None,
|
|
79
80
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
80
|
-
cleanup_ttl: int = None,
|
|
81
|
-
image: str = None,
|
|
81
|
+
cleanup_ttl: typing.Optional[int] = None,
|
|
82
|
+
image: typing.Optional[str] = None,
|
|
82
83
|
):
|
|
83
84
|
self.engine = engine
|
|
84
85
|
self.code = code
|
|
@@ -401,6 +402,9 @@ def enrich_function_object(
|
|
|
401
402
|
else:
|
|
402
403
|
f.spec.build.source = project.spec.source
|
|
403
404
|
f.spec.build.load_source_on_run = project.spec.load_source_on_run
|
|
405
|
+
f.spec.build.source_code_target_dir = (
|
|
406
|
+
project.spec.build.source_code_target_dir
|
|
407
|
+
)
|
|
404
408
|
f.spec.workdir = project.spec.workdir or project.spec.subpath
|
|
405
409
|
f.prepare_image_for_deploy()
|
|
406
410
|
|
|
@@ -605,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
605
609
|
namespace=namespace,
|
|
606
610
|
artifact_path=artifact_path,
|
|
607
611
|
cleanup_ttl=workflow_spec.cleanup_ttl,
|
|
612
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
|
|
608
613
|
)
|
|
609
614
|
|
|
610
615
|
# The user provided workflow code might have made changes to function specs that require cleanup
|
|
@@ -862,17 +867,44 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
862
867
|
)
|
|
863
868
|
return
|
|
864
869
|
|
|
870
|
+
get_workflow_id_timeout = max(
|
|
871
|
+
int(mlrun.mlconf.workflows.timeouts.remote),
|
|
872
|
+
int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
logger.debug(
|
|
876
|
+
"Workflow submitted, waiting for pipeline run to start",
|
|
877
|
+
workflow_name=workflow_response.name,
|
|
878
|
+
get_workflow_id_timeout=get_workflow_id_timeout,
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
def _get_workflow_id_or_bail():
|
|
882
|
+
try:
|
|
883
|
+
return run_db.get_workflow_id(
|
|
884
|
+
project=project.name,
|
|
885
|
+
name=workflow_response.name,
|
|
886
|
+
run_id=workflow_response.run_id,
|
|
887
|
+
engine=workflow_spec.engine,
|
|
888
|
+
)
|
|
889
|
+
except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
|
|
890
|
+
# fail fast on specific errors
|
|
891
|
+
if get_wf_exc.error_status_code in [
|
|
892
|
+
http.HTTPStatus.PRECONDITION_FAILED
|
|
893
|
+
]:
|
|
894
|
+
raise mlrun.errors.MLRunFatalFailureError(
|
|
895
|
+
original_exception=get_wf_exc
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# raise for a retry (on other errors)
|
|
899
|
+
raise
|
|
900
|
+
|
|
865
901
|
# Getting workflow id from run:
|
|
866
902
|
response = retry_until_successful(
|
|
867
903
|
1,
|
|
868
|
-
|
|
904
|
+
get_workflow_id_timeout,
|
|
869
905
|
logger,
|
|
870
906
|
False,
|
|
871
|
-
|
|
872
|
-
project=project.name,
|
|
873
|
-
name=workflow_response.name,
|
|
874
|
-
run_id=workflow_response.run_id,
|
|
875
|
-
engine=workflow_spec.engine,
|
|
907
|
+
_get_workflow_id_or_bail,
|
|
876
908
|
)
|
|
877
909
|
workflow_id = response.workflow_id
|
|
878
910
|
# After fetching the workflow_id the workflow executed successfully
|
|
@@ -988,6 +1020,7 @@ def load_and_run(
|
|
|
988
1020
|
cleanup_ttl: int = None,
|
|
989
1021
|
load_only: bool = False,
|
|
990
1022
|
wait_for_completion: bool = False,
|
|
1023
|
+
project_context: str = None,
|
|
991
1024
|
):
|
|
992
1025
|
"""
|
|
993
1026
|
Auxiliary function that the RemoteRunner run once or run every schedule.
|
|
@@ -1018,10 +1051,11 @@ def load_and_run(
|
|
|
1018
1051
|
workflow and all its resources are deleted)
|
|
1019
1052
|
:param load_only: for just loading the project, inner use.
|
|
1020
1053
|
:param wait_for_completion: wait for workflow completion before returning
|
|
1054
|
+
:param project_context: project context path (used for loading the project)
|
|
1021
1055
|
"""
|
|
1022
1056
|
try:
|
|
1023
1057
|
project = mlrun.load_project(
|
|
1024
|
-
context=f"./{project_name}",
|
|
1058
|
+
context=project_context or f"./{project_name}",
|
|
1025
1059
|
url=url,
|
|
1026
1060
|
name=project_name,
|
|
1027
1061
|
init_git=init_git,
|
|
@@ -1053,7 +1087,7 @@ def load_and_run(
|
|
|
1053
1087
|
|
|
1054
1088
|
raise error
|
|
1055
1089
|
|
|
1056
|
-
context.logger.info(f"Loaded project {project.name}
|
|
1090
|
+
context.logger.info(f"Loaded project {project.name} successfully")
|
|
1057
1091
|
|
|
1058
1092
|
if load_only:
|
|
1059
1093
|
return
|