mlrun 1.6.2rc5__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/config.py +32 -12
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/httpdb.py +51 -30
- mlrun/lists.py +2 -0
- mlrun/model.py +26 -2
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stream_processing.py +50 -37
- mlrun/projects/pipelines.py +30 -6
- mlrun/projects/project.py +20 -23
- mlrun/render.py +13 -4
- mlrun/run.py +2 -0
- mlrun/runtimes/pod.py +5 -5
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +12 -0
- mlrun/utils/logger.py +11 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/METADATA +9 -7
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/RECORD +30 -30
- mlrun/datastore/helpers.py +0 -18
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py
CHANGED
|
@@ -33,6 +33,7 @@ import mlrun.common.schemas
|
|
|
33
33
|
import mlrun.model_monitoring.model_endpoint
|
|
34
34
|
import mlrun.platforms
|
|
35
35
|
import mlrun.projects
|
|
36
|
+
from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
|
|
36
37
|
from mlrun.errors import MLRunInvalidArgumentError, err_to_str
|
|
37
38
|
|
|
38
39
|
from ..artifacts import Artifact
|
|
@@ -133,17 +134,28 @@ class HTTPRunDB(RunDBInterface):
|
|
|
133
134
|
endpoint += f":{parsed_url.port}"
|
|
134
135
|
base_url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
|
|
135
136
|
|
|
137
|
+
self.base_url = base_url
|
|
136
138
|
username = parsed_url.username or config.httpdb.user
|
|
137
139
|
password = parsed_url.password or config.httpdb.password
|
|
140
|
+
self.token_provider = None
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
+
if config.auth_with_client_id.enabled:
|
|
143
|
+
self.token_provider = OAuthClientIDTokenProvider(
|
|
144
|
+
token_endpoint=mlrun.get_secret_or_env("MLRUN_AUTH_TOKEN_ENDPOINT"),
|
|
145
|
+
client_id=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_ID"),
|
|
146
|
+
client_secret=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_SECRET"),
|
|
147
|
+
timeout=config.auth_with_client_id.request_timeout,
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
username, password, token = mlrun.platforms.add_or_refresh_credentials(
|
|
151
|
+
parsed_url.hostname, username, password, config.httpdb.token
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if token:
|
|
155
|
+
self.token_provider = StaticTokenProvider(token)
|
|
142
156
|
|
|
143
|
-
self.base_url = base_url
|
|
144
157
|
self.user = username
|
|
145
158
|
self.password = password
|
|
146
|
-
self.token = token
|
|
147
159
|
|
|
148
160
|
def __repr__(self):
|
|
149
161
|
cls = self.__class__.__name__
|
|
@@ -213,17 +225,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
213
225
|
|
|
214
226
|
if self.user:
|
|
215
227
|
kw["auth"] = (self.user, self.password)
|
|
216
|
-
elif self.
|
|
217
|
-
|
|
218
|
-
if
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
"
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
228
|
+
elif self.token_provider:
|
|
229
|
+
token = self.token_provider.get_token()
|
|
230
|
+
if token:
|
|
231
|
+
# Iguazio auth doesn't support passing token through bearer, so use cookie instead
|
|
232
|
+
if self.token_provider.is_iguazio_session():
|
|
233
|
+
session_cookie = f'j:{{"sid": "{token}"}}'
|
|
234
|
+
cookies = {
|
|
235
|
+
"session": session_cookie,
|
|
236
|
+
}
|
|
237
|
+
kw["cookies"] = cookies
|
|
238
|
+
else:
|
|
239
|
+
if "Authorization" not in kw.setdefault("headers", {}):
|
|
240
|
+
kw["headers"].update({"Authorization": "Bearer " + token})
|
|
227
241
|
|
|
228
242
|
if mlrun.common.schemas.HeaderNames.client_version not in kw.setdefault(
|
|
229
243
|
"headers", {}
|
|
@@ -930,6 +944,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
930
944
|
kind: str = None,
|
|
931
945
|
category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
|
|
932
946
|
tree: str = None,
|
|
947
|
+
producer_uri: str = None,
|
|
933
948
|
) -> ArtifactList:
|
|
934
949
|
"""List artifacts filtered by various parameters.
|
|
935
950
|
|
|
@@ -956,9 +971,12 @@ class HTTPRunDB(RunDBInterface):
|
|
|
956
971
|
:param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
|
|
957
972
|
artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
|
|
958
973
|
from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
|
|
959
|
-
:param kind:
|
|
960
|
-
:param category:
|
|
961
|
-
:param tree:
|
|
974
|
+
:param kind: Return artifacts of the requested kind.
|
|
975
|
+
:param category: Return artifacts of the requested category.
|
|
976
|
+
:param tree: Return artifacts of the requested tree.
|
|
977
|
+
:param producer_uri: Return artifacts produced by the requested producer URI. Producer URI usually
|
|
978
|
+
points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
|
|
979
|
+
is a workflow id (artifact was created as part of a workflow).
|
|
962
980
|
"""
|
|
963
981
|
|
|
964
982
|
project = project or config.default_project
|
|
@@ -977,6 +995,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
977
995
|
"category": category,
|
|
978
996
|
"tree": tree,
|
|
979
997
|
"format": mlrun.common.schemas.ArtifactsFormat.full.value,
|
|
998
|
+
"producer_uri": producer_uri,
|
|
980
999
|
}
|
|
981
1000
|
error = "list artifacts"
|
|
982
1001
|
endpoint_path = f"projects/{project}/artifacts"
|
|
@@ -1611,19 +1630,21 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1611
1630
|
artifact_path=None,
|
|
1612
1631
|
ops=None,
|
|
1613
1632
|
cleanup_ttl=None,
|
|
1633
|
+
timeout=60,
|
|
1614
1634
|
):
|
|
1615
1635
|
"""Submit a KFP pipeline for execution.
|
|
1616
1636
|
|
|
1617
|
-
:param project:
|
|
1618
|
-
:param pipeline:
|
|
1619
|
-
:param arguments:
|
|
1620
|
-
:param experiment:
|
|
1621
|
-
:param run:
|
|
1622
|
-
:param namespace:
|
|
1623
|
-
:param artifact_path:
|
|
1624
|
-
:param ops:
|
|
1625
|
-
:param cleanup_ttl:
|
|
1626
|
-
|
|
1637
|
+
:param project: The project of the pipeline
|
|
1638
|
+
:param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
|
|
1639
|
+
:param arguments: A dictionary of arguments to pass to the pipeline.
|
|
1640
|
+
:param experiment: A name to assign for the specific experiment.
|
|
1641
|
+
:param run: A name for this specific run.
|
|
1642
|
+
:param namespace: Kubernetes namespace to execute the pipeline in.
|
|
1643
|
+
:param artifact_path: A path to artifacts used by this pipeline.
|
|
1644
|
+
:param ops: Transformers to apply on all ops in the pipeline.
|
|
1645
|
+
:param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1646
|
+
workflow and all its resources are deleted)
|
|
1647
|
+
:param timeout: Timeout for the API call.
|
|
1627
1648
|
"""
|
|
1628
1649
|
|
|
1629
1650
|
if isinstance(pipeline, str):
|
|
@@ -1665,7 +1686,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1665
1686
|
"POST",
|
|
1666
1687
|
f"projects/{project}/pipelines",
|
|
1667
1688
|
params=params,
|
|
1668
|
-
timeout=
|
|
1689
|
+
timeout=timeout,
|
|
1669
1690
|
body=data,
|
|
1670
1691
|
headers=headers,
|
|
1671
1692
|
)
|
mlrun/lists.py
CHANGED
|
@@ -36,6 +36,7 @@ list_header = [
|
|
|
36
36
|
"parameters",
|
|
37
37
|
"results",
|
|
38
38
|
"artifacts",
|
|
39
|
+
"artifact_uris",
|
|
39
40
|
"error",
|
|
40
41
|
]
|
|
41
42
|
|
|
@@ -63,6 +64,7 @@ class RunList(list):
|
|
|
63
64
|
get_in(run, "spec.parameters", ""),
|
|
64
65
|
get_in(run, "status.results", ""),
|
|
65
66
|
get_in(run, "status.artifacts", []),
|
|
67
|
+
get_in(run, "status.artifact_uris", {}),
|
|
66
68
|
get_in(run, "status.error", ""),
|
|
67
69
|
]
|
|
68
70
|
if extend_iterations and iterations:
|
mlrun/model.py
CHANGED
|
@@ -62,6 +62,7 @@ class ModelObj:
|
|
|
62
62
|
return new_type.from_dict(param)
|
|
63
63
|
return param
|
|
64
64
|
|
|
65
|
+
@mlrun.utils.filter_warnings("ignore", FutureWarning)
|
|
65
66
|
def to_dict(self, fields=None, exclude=None):
|
|
66
67
|
"""convert the object to a python dictionary
|
|
67
68
|
|
|
@@ -623,6 +624,11 @@ class RunMetadata(ModelObj):
|
|
|
623
624
|
def iteration(self, iteration):
|
|
624
625
|
self._iteration = iteration
|
|
625
626
|
|
|
627
|
+
def is_workflow_runner(self):
|
|
628
|
+
if not self.labels:
|
|
629
|
+
return False
|
|
630
|
+
return self.labels.get("job-type", "") == "workflow-runner"
|
|
631
|
+
|
|
626
632
|
|
|
627
633
|
class HyperParamStrategies:
|
|
628
634
|
grid = "grid"
|
|
@@ -1052,6 +1058,7 @@ class RunStatus(ModelObj):
|
|
|
1052
1058
|
ui_url=None,
|
|
1053
1059
|
reason: str = None,
|
|
1054
1060
|
notifications: Dict[str, Notification] = None,
|
|
1061
|
+
artifact_uris: dict[str, str] = None,
|
|
1055
1062
|
):
|
|
1056
1063
|
self.state = state or "created"
|
|
1057
1064
|
self.status_text = status_text
|
|
@@ -1066,6 +1073,21 @@ class RunStatus(ModelObj):
|
|
|
1066
1073
|
self.ui_url = ui_url
|
|
1067
1074
|
self.reason = reason
|
|
1068
1075
|
self.notifications = notifications or {}
|
|
1076
|
+
# Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
|
|
1077
|
+
self.artifact_uris = artifact_uris or {}
|
|
1078
|
+
|
|
1079
|
+
def is_failed(self) -> Optional[bool]:
|
|
1080
|
+
"""
|
|
1081
|
+
This method returns whether a run has failed.
|
|
1082
|
+
Returns none if state has yet to be defined. callee is responsible for handling None.
|
|
1083
|
+
(e.g wait for state to be defined)
|
|
1084
|
+
"""
|
|
1085
|
+
if not self.state:
|
|
1086
|
+
return None
|
|
1087
|
+
return self.state.casefold() in [
|
|
1088
|
+
mlrun.run.RunStatuses.failed.casefold(),
|
|
1089
|
+
mlrun.run.RunStatuses.error.casefold(),
|
|
1090
|
+
]
|
|
1069
1091
|
|
|
1070
1092
|
|
|
1071
1093
|
class RunTemplate(ModelObj):
|
|
@@ -1365,8 +1387,10 @@ class RunObject(RunTemplate):
|
|
|
1365
1387
|
iter=self.metadata.iteration,
|
|
1366
1388
|
)
|
|
1367
1389
|
if run:
|
|
1368
|
-
|
|
1369
|
-
|
|
1390
|
+
run_status = run.get("status", {})
|
|
1391
|
+
# Artifacts are not stored in the DB, so we need to preserve them here
|
|
1392
|
+
run_status["artifacts"] = self.status.artifacts
|
|
1393
|
+
self.status = RunStatus.from_dict(run_status)
|
|
1370
1394
|
return self
|
|
1371
1395
|
|
|
1372
1396
|
def show(self):
|
|
@@ -426,13 +426,6 @@ class MonitoringApplicationController:
|
|
|
426
426
|
m_fs = fstore.get_feature_set(
|
|
427
427
|
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
428
428
|
)
|
|
429
|
-
labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
|
|
430
|
-
if labels:
|
|
431
|
-
if isinstance(labels, str):
|
|
432
|
-
labels = json.loads(labels)
|
|
433
|
-
for label in labels:
|
|
434
|
-
if label not in list(m_fs.spec.features.keys()):
|
|
435
|
-
m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
|
|
436
429
|
|
|
437
430
|
for application in applications_names:
|
|
438
431
|
batch_window = batch_window_generator.get_batch_window(
|
|
@@ -19,6 +19,7 @@ import plotly.graph_objects as go
|
|
|
19
19
|
from plotly.subplots import make_subplots
|
|
20
20
|
|
|
21
21
|
import mlrun.common.schemas.model_monitoring
|
|
22
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
22
23
|
|
|
23
24
|
# A type for representing a drift result, a tuple of the status and the drift mean:
|
|
24
25
|
DriftResultType = Tuple[mlrun.common.schemas.model_monitoring.DriftStatus, float]
|
|
@@ -112,6 +113,11 @@ class FeaturesDriftTablePlot:
|
|
|
112
113
|
:return: The full path to the html file of the plot.
|
|
113
114
|
"""
|
|
114
115
|
# Plot the drift table:
|
|
116
|
+
features = [
|
|
117
|
+
feature
|
|
118
|
+
for feature in features
|
|
119
|
+
if feature not in mm_constants.FeatureSetFeatures.list()
|
|
120
|
+
]
|
|
115
121
|
figure = self._plot(
|
|
116
122
|
features=features,
|
|
117
123
|
sample_set_statistics=sample_set_statistics,
|
|
@@ -41,7 +41,7 @@ class _MLRunNoRunsFoundError(Exception):
|
|
|
41
41
|
pass
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def get_stream_path(project: str = None, application_name: str = None):
|
|
44
|
+
def get_stream_path(project: str = None, application_name: str = None) -> str:
|
|
45
45
|
"""
|
|
46
46
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
47
47
|
|
|
@@ -62,6 +62,9 @@ def get_stream_path(project: str = None, application_name: str = None):
|
|
|
62
62
|
application_name=application_name,
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
+
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
66
|
+
stream_uri = stream_uri[1]
|
|
67
|
+
|
|
65
68
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
66
69
|
stream_uri=stream_uri, project=project, application_name=application_name
|
|
67
70
|
)
|
|
@@ -24,6 +24,7 @@ import mlrun
|
|
|
24
24
|
import mlrun.common.model_monitoring.helpers
|
|
25
25
|
import mlrun.config
|
|
26
26
|
import mlrun.datastore.targets
|
|
27
|
+
import mlrun.feature_store as fstore
|
|
27
28
|
import mlrun.feature_store.steps
|
|
28
29
|
import mlrun.model_monitoring.prometheus
|
|
29
30
|
import mlrun.serving.states
|
|
@@ -49,7 +50,7 @@ class EventStreamProcessor:
|
|
|
49
50
|
parquet_batching_timeout_secs: int,
|
|
50
51
|
parquet_target: str,
|
|
51
52
|
sample_window: int = 10,
|
|
52
|
-
aggregate_windows: typing.Optional[
|
|
53
|
+
aggregate_windows: typing.Optional[list[str]] = None,
|
|
53
54
|
aggregate_period: str = "30s",
|
|
54
55
|
model_monitoring_access_key: str = None,
|
|
55
56
|
):
|
|
@@ -349,7 +350,6 @@ class EventStreamProcessor:
|
|
|
349
350
|
rate="10/m",
|
|
350
351
|
time_col=EventFieldType.TIMESTAMP,
|
|
351
352
|
container=self.tsdb_container,
|
|
352
|
-
access_key=self.v3io_access_key,
|
|
353
353
|
v3io_frames=self.v3io_framesd,
|
|
354
354
|
infer_columns_from_data=True,
|
|
355
355
|
index_cols=[
|
|
@@ -587,6 +587,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
|
|
|
587
587
|
for key in [
|
|
588
588
|
EventFieldType.FEATURES,
|
|
589
589
|
EventFieldType.NAMED_FEATURES,
|
|
590
|
+
EventFieldType.PREDICTION,
|
|
591
|
+
EventFieldType.NAMED_PREDICTIONS,
|
|
590
592
|
]:
|
|
591
593
|
event.pop(key, None)
|
|
592
594
|
|
|
@@ -629,14 +631,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
629
631
|
self.project: str = project
|
|
630
632
|
|
|
631
633
|
# First and last requests timestamps (value) of each endpoint (key)
|
|
632
|
-
self.first_request:
|
|
633
|
-
self.last_request:
|
|
634
|
+
self.first_request: dict[str, str] = dict()
|
|
635
|
+
self.last_request: dict[str, str] = dict()
|
|
634
636
|
|
|
635
637
|
# Number of errors (value) per endpoint (key)
|
|
636
|
-
self.error_count:
|
|
638
|
+
self.error_count: dict[str, int] = collections.defaultdict(int)
|
|
637
639
|
|
|
638
640
|
# Set of endpoints in the current events
|
|
639
|
-
self.endpoints:
|
|
641
|
+
self.endpoints: set[str] = set()
|
|
640
642
|
|
|
641
643
|
def do(self, full_event):
|
|
642
644
|
event = full_event.body
|
|
@@ -745,18 +747,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
745
747
|
# in list of events. This list will be used as the body for the storey event.
|
|
746
748
|
events = []
|
|
747
749
|
for i, (feature, prediction) in enumerate(zip(features, predictions)):
|
|
748
|
-
# Validate that inputs are based on numeric values
|
|
749
|
-
if not self.is_valid(
|
|
750
|
-
endpoint_id,
|
|
751
|
-
self.is_list_of_numerics,
|
|
752
|
-
feature,
|
|
753
|
-
["request", "inputs", f"[{i}]"],
|
|
754
|
-
):
|
|
755
|
-
return None
|
|
756
|
-
|
|
757
750
|
if not isinstance(prediction, list):
|
|
758
751
|
prediction = [prediction]
|
|
759
752
|
|
|
753
|
+
if not isinstance(feature, list):
|
|
754
|
+
feature = [feature]
|
|
755
|
+
|
|
760
756
|
events.append(
|
|
761
757
|
{
|
|
762
758
|
EventFieldType.FUNCTION_URI: function_uri,
|
|
@@ -803,18 +799,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
803
799
|
f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
|
|
804
800
|
)
|
|
805
801
|
|
|
806
|
-
@staticmethod
|
|
807
|
-
def is_list_of_numerics(
|
|
808
|
-
field: typing.List[typing.Union[int, float, dict, list]],
|
|
809
|
-
dict_path: typing.List[str],
|
|
810
|
-
):
|
|
811
|
-
if all(isinstance(x, int) or isinstance(x, float) for x in field):
|
|
812
|
-
return True
|
|
813
|
-
logger.error(
|
|
814
|
-
f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
|
|
815
|
-
)
|
|
816
|
-
return False
|
|
817
|
-
|
|
818
802
|
def resume_state(self, endpoint_id):
|
|
819
803
|
# Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
|
|
820
804
|
# left them
|
|
@@ -849,7 +833,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
849
833
|
endpoint_id: str,
|
|
850
834
|
validation_function,
|
|
851
835
|
field: typing.Any,
|
|
852
|
-
dict_path:
|
|
836
|
+
dict_path: list[str],
|
|
853
837
|
):
|
|
854
838
|
if validation_function(field, dict_path):
|
|
855
839
|
return True
|
|
@@ -857,7 +841,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
857
841
|
return False
|
|
858
842
|
|
|
859
843
|
|
|
860
|
-
def is_not_none(field: typing.Any, dict_path:
|
|
844
|
+
def is_not_none(field: typing.Any, dict_path: list[str]):
|
|
861
845
|
if field is not None:
|
|
862
846
|
return True
|
|
863
847
|
logger.error(
|
|
@@ -946,9 +930,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
946
930
|
return self.label_columns[endpoint_id]
|
|
947
931
|
return None
|
|
948
932
|
|
|
949
|
-
def do(self, event:
|
|
933
|
+
def do(self, event: dict):
|
|
950
934
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
951
935
|
|
|
936
|
+
feature_values = event[EventFieldType.FEATURES]
|
|
937
|
+
label_values = event[EventFieldType.PREDICTION]
|
|
952
938
|
# Get feature names and label columns
|
|
953
939
|
if endpoint_id not in self.feature_names:
|
|
954
940
|
endpoint_record = get_endpoint_record(
|
|
@@ -984,6 +970,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
984
970
|
},
|
|
985
971
|
)
|
|
986
972
|
|
|
973
|
+
update_monitoring_feature_set(
|
|
974
|
+
endpoint_record=endpoint_record,
|
|
975
|
+
feature_names=feature_names,
|
|
976
|
+
feature_values=feature_values,
|
|
977
|
+
)
|
|
978
|
+
|
|
987
979
|
# Similar process with label columns
|
|
988
980
|
if not label_columns and self._infer_columns_from_data:
|
|
989
981
|
label_columns = self._infer_label_columns_from_data(event)
|
|
@@ -1002,6 +994,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1002
994
|
endpoint_id=endpoint_id,
|
|
1003
995
|
attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
|
|
1004
996
|
)
|
|
997
|
+
update_monitoring_feature_set(
|
|
998
|
+
endpoint_record=endpoint_record,
|
|
999
|
+
feature_names=label_columns,
|
|
1000
|
+
feature_values=label_values,
|
|
1001
|
+
)
|
|
1005
1002
|
|
|
1006
1003
|
self.label_columns[endpoint_id] = label_columns
|
|
1007
1004
|
self.feature_names[endpoint_id] = feature_names
|
|
@@ -1019,7 +1016,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1019
1016
|
|
|
1020
1017
|
# Add feature_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1021
1018
|
feature_names = self.feature_names[endpoint_id]
|
|
1022
|
-
feature_values = event[EventFieldType.FEATURES]
|
|
1023
1019
|
self._map_dictionary_values(
|
|
1024
1020
|
event=event,
|
|
1025
1021
|
named_iters=feature_names,
|
|
@@ -1029,7 +1025,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1029
1025
|
|
|
1030
1026
|
# Add label_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1031
1027
|
label_names = self.label_columns[endpoint_id]
|
|
1032
|
-
label_values = event[EventFieldType.PREDICTION]
|
|
1033
1028
|
self._map_dictionary_values(
|
|
1034
1029
|
event=event,
|
|
1035
1030
|
named_iters=label_names,
|
|
@@ -1045,9 +1040,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1045
1040
|
|
|
1046
1041
|
@staticmethod
|
|
1047
1042
|
def _map_dictionary_values(
|
|
1048
|
-
event:
|
|
1049
|
-
named_iters:
|
|
1050
|
-
values_iters:
|
|
1043
|
+
event: dict,
|
|
1044
|
+
named_iters: list,
|
|
1045
|
+
values_iters: list,
|
|
1051
1046
|
mapping_dictionary: str,
|
|
1052
1047
|
):
|
|
1053
1048
|
"""Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
|
|
@@ -1082,7 +1077,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
|
|
|
1082
1077
|
self.project = project
|
|
1083
1078
|
self.model_endpoint_store_target = model_endpoint_store_target
|
|
1084
1079
|
|
|
1085
|
-
def do(self, event:
|
|
1080
|
+
def do(self, event: dict):
|
|
1086
1081
|
update_endpoint_record(
|
|
1087
1082
|
project=self.project,
|
|
1088
1083
|
endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
|
|
@@ -1117,7 +1112,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
|
|
|
1117
1112
|
self.table = table
|
|
1118
1113
|
self.keys = set()
|
|
1119
1114
|
|
|
1120
|
-
def do(self, event:
|
|
1115
|
+
def do(self, event: dict):
|
|
1121
1116
|
key_set = set(event.keys())
|
|
1122
1117
|
if not key_set.issubset(self.keys):
|
|
1123
1118
|
self.keys.update(key_set)
|
|
@@ -1241,3 +1236,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
|
|
|
1241
1236
|
project=project,
|
|
1242
1237
|
)
|
|
1243
1238
|
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def update_monitoring_feature_set(
|
|
1242
|
+
endpoint_record: dict[str, typing.Any],
|
|
1243
|
+
feature_names: list[str],
|
|
1244
|
+
feature_values: list[typing.Any],
|
|
1245
|
+
):
|
|
1246
|
+
monitoring_feature_set = fstore.get_feature_set(
|
|
1247
|
+
endpoint_record[
|
|
1248
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
|
|
1249
|
+
]
|
|
1250
|
+
)
|
|
1251
|
+
for name, val in zip(feature_names, feature_values):
|
|
1252
|
+
monitoring_feature_set.add_feature(
|
|
1253
|
+
fstore.Feature(name=name, value_type=type(val))
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
monitoring_feature_set.save()
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import builtins
|
|
16
|
+
import http
|
|
16
17
|
import importlib.util as imputil
|
|
17
18
|
import os
|
|
18
19
|
import tempfile
|
|
@@ -608,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
608
609
|
namespace=namespace,
|
|
609
610
|
artifact_path=artifact_path,
|
|
610
611
|
cleanup_ttl=workflow_spec.cleanup_ttl,
|
|
612
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
|
|
611
613
|
)
|
|
612
614
|
|
|
613
615
|
# The user provided workflow code might have made changes to function specs that require cleanup
|
|
@@ -865,22 +867,44 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
865
867
|
)
|
|
866
868
|
return
|
|
867
869
|
|
|
870
|
+
get_workflow_id_timeout = max(
|
|
871
|
+
int(mlrun.mlconf.workflows.timeouts.remote),
|
|
872
|
+
int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
|
|
873
|
+
)
|
|
874
|
+
|
|
868
875
|
logger.debug(
|
|
869
876
|
"Workflow submitted, waiting for pipeline run to start",
|
|
870
877
|
workflow_name=workflow_response.name,
|
|
878
|
+
get_workflow_id_timeout=get_workflow_id_timeout,
|
|
871
879
|
)
|
|
872
880
|
|
|
881
|
+
def _get_workflow_id_or_bail():
|
|
882
|
+
try:
|
|
883
|
+
return run_db.get_workflow_id(
|
|
884
|
+
project=project.name,
|
|
885
|
+
name=workflow_response.name,
|
|
886
|
+
run_id=workflow_response.run_id,
|
|
887
|
+
engine=workflow_spec.engine,
|
|
888
|
+
)
|
|
889
|
+
except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
|
|
890
|
+
# fail fast on specific errors
|
|
891
|
+
if get_wf_exc.error_status_code in [
|
|
892
|
+
http.HTTPStatus.PRECONDITION_FAILED
|
|
893
|
+
]:
|
|
894
|
+
raise mlrun.errors.MLRunFatalFailureError(
|
|
895
|
+
original_exception=get_wf_exc
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# raise for a retry (on other errors)
|
|
899
|
+
raise
|
|
900
|
+
|
|
873
901
|
# Getting workflow id from run:
|
|
874
902
|
response = retry_until_successful(
|
|
875
903
|
1,
|
|
876
|
-
|
|
904
|
+
get_workflow_id_timeout,
|
|
877
905
|
logger,
|
|
878
906
|
False,
|
|
879
|
-
|
|
880
|
-
project=project.name,
|
|
881
|
-
name=workflow_response.name,
|
|
882
|
-
run_id=workflow_response.run_id,
|
|
883
|
-
engine=workflow_spec.engine,
|
|
907
|
+
_get_workflow_id_or_bail,
|
|
884
908
|
)
|
|
885
909
|
workflow_id = response.workflow_id
|
|
886
910
|
# After fetching the workflow_id the workflow executed successfully
|
mlrun/projects/project.py
CHANGED
|
@@ -2602,16 +2602,12 @@ class MlrunProject(ModelObj):
|
|
|
2602
2602
|
) -> _PipelineRunStatus:
|
|
2603
2603
|
"""Run a workflow using kubeflow pipelines
|
|
2604
2604
|
|
|
2605
|
-
:param name:
|
|
2606
|
-
:param workflow_path:
|
|
2607
|
-
|
|
2608
|
-
:param
|
|
2609
|
-
|
|
2610
|
-
:param
|
|
2611
|
-
Target path/url for workflow artifacts, the string
|
|
2612
|
-
'{{workflow.uid}}' will be replaced by workflow id
|
|
2613
|
-
:param workflow_handler:
|
|
2614
|
-
Workflow function handler (for running workflow function directly)
|
|
2605
|
+
:param name: Name of the workflow
|
|
2606
|
+
:param workflow_path: URL to a workflow file, if not a project workflow
|
|
2607
|
+
:param arguments: Kubeflow pipelines arguments (parameters)
|
|
2608
|
+
:param artifact_path: Target path/URL for workflow artifacts, the string '{{workflow.uid}}' will be
|
|
2609
|
+
replaced by workflow id.
|
|
2610
|
+
:param workflow_handler: Workflow function handler (for running workflow function directly)
|
|
2615
2611
|
:param namespace: Kubernetes namespace if other than default
|
|
2616
2612
|
:param sync: Force functions sync before run
|
|
2617
2613
|
:param watch: Wait for pipeline completion
|
|
@@ -2624,7 +2620,7 @@ class MlrunProject(ModelObj):
|
|
|
2624
2620
|
(which will be converted to the class using its `from_crontab` constructor),
|
|
2625
2621
|
see this link for help:
|
|
2626
2622
|
https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
|
|
2627
|
-
|
|
2623
|
+
For using the pre-defined workflow's schedule, set `schedule=True`
|
|
2628
2624
|
:param timeout: Timeout in seconds to wait for pipeline completion (watch will be activated)
|
|
2629
2625
|
:param source: Source to use instead of the actual `project.spec.source` (used when engine is remote).
|
|
2630
2626
|
Can be a one of:
|
|
@@ -2633,12 +2629,11 @@ class MlrunProject(ModelObj):
|
|
|
2633
2629
|
Path can be absolute or relative to `project.spec.build.source_code_target_dir` if defined
|
|
2634
2630
|
(enriched when building a project image with source, see `MlrunProject.build_image`).
|
|
2635
2631
|
For other engines the source is used to validate that the code is up-to-date.
|
|
2636
|
-
:param cleanup_ttl:
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
:returns: Run id
|
|
2632
|
+
:param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
2633
|
+
workflow and all its resources are deleted)
|
|
2634
|
+
:param notifications: List of notifications to send for workflow completion
|
|
2635
|
+
|
|
2636
|
+
:returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
|
|
2642
2637
|
"""
|
|
2643
2638
|
|
|
2644
2639
|
arguments = arguments or {}
|
|
@@ -2655,12 +2650,14 @@ class MlrunProject(ModelObj):
|
|
|
2655
2650
|
"Remote repo is not defined, use .create_remote() + push()"
|
|
2656
2651
|
)
|
|
2657
2652
|
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2653
|
+
if engine not in ["remote"]:
|
|
2654
|
+
# for remote runs we don't require the functions to be synced as they can be loaded dynamically during run
|
|
2655
|
+
self.sync_functions(always=sync)
|
|
2656
|
+
if not self.spec._function_objects:
|
|
2657
|
+
raise ValueError(
|
|
2658
|
+
"There are no functions in the project."
|
|
2659
|
+
" Make sure you've set your functions with project.set_function()."
|
|
2660
|
+
)
|
|
2664
2661
|
|
|
2665
2662
|
if not name and not workflow_path and not workflow_handler:
|
|
2666
2663
|
raise ValueError("Workflow name, path, or handler must be specified")
|
mlrun/render.py
CHANGED
|
@@ -134,7 +134,7 @@ def artifacts_html(
|
|
|
134
134
|
|
|
135
135
|
if not attribute_value:
|
|
136
136
|
mlrun.utils.logger.warning(
|
|
137
|
-
"Artifact is
|
|
137
|
+
f"Artifact required attribute {attribute_name} is missing, omitting from output",
|
|
138
138
|
artifact_key=key,
|
|
139
139
|
)
|
|
140
140
|
continue
|
|
@@ -404,12 +404,21 @@ def runs_to_html(
|
|
|
404
404
|
df.drop("labels", axis=1, inplace=True)
|
|
405
405
|
df.drop("inputs", axis=1, inplace=True)
|
|
406
406
|
df.drop("artifacts", axis=1, inplace=True)
|
|
407
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
407
408
|
else:
|
|
408
409
|
df["labels"] = df["labels"].apply(dict_html)
|
|
409
410
|
df["inputs"] = df["inputs"].apply(inputs_html)
|
|
410
|
-
df["artifacts"]
|
|
411
|
-
|
|
412
|
-
|
|
411
|
+
if df["artifacts"][0]:
|
|
412
|
+
df["artifacts"] = df["artifacts"].apply(
|
|
413
|
+
lambda artifacts: artifacts_html(artifacts, "target_path"),
|
|
414
|
+
)
|
|
415
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
416
|
+
elif df["artifact_uris"][0]:
|
|
417
|
+
df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
|
|
418
|
+
df.drop("artifacts", axis=1, inplace=True)
|
|
419
|
+
else:
|
|
420
|
+
df.drop("artifacts", axis=1, inplace=True)
|
|
421
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
413
422
|
|
|
414
423
|
def expand_error(x):
|
|
415
424
|
if x["state"] == "error":
|