mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/serving/steps.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Union
|
|
16
|
+
|
|
17
|
+
import storey
|
|
18
|
+
|
|
19
|
+
import mlrun.errors
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ChoiceByField(storey.Choice):
|
|
23
|
+
"""
|
|
24
|
+
Selects downstream outlets to route each event based on a predetermined field.
|
|
25
|
+
:param field_name: event field name that contains the step name or names of the desired outlet or outlets
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, field_name: Union[str, list[str]], **kwargs):
|
|
29
|
+
self.field_name = field_name
|
|
30
|
+
super().__init__(**kwargs)
|
|
31
|
+
|
|
32
|
+
def select_outlets(self, event):
|
|
33
|
+
# Case 1: Missing field
|
|
34
|
+
if self.field_name not in event:
|
|
35
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
36
|
+
f"Field '{self.field_name}' is not contained in the event keys {list(event.keys())}."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
outlet = event[self.field_name]
|
|
40
|
+
|
|
41
|
+
# Case 2: Field exists but is None
|
|
42
|
+
if outlet is None:
|
|
43
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
44
|
+
f"Field '{self.field_name}' exists but its value is None."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Case 3: Invalid type
|
|
48
|
+
if not isinstance(outlet, (str, list, tuple)):
|
|
49
|
+
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
50
|
+
f"Field '{self.field_name}' must be a string or list of strings "
|
|
51
|
+
f"but is instead of type '{type(outlet).__name__}'."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
outlets = [outlet] if isinstance(outlet, str) else outlet
|
|
55
|
+
|
|
56
|
+
# Case 4: Empty list or tuple
|
|
57
|
+
if not outlets:
|
|
58
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
59
|
+
f"The value of the key '{self.field_name}' cannot be an empty {type(outlets).__name__}."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return outlets
|
mlrun/serving/system_steps.py
CHANGED
|
@@ -11,20 +11,38 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import random
|
|
16
|
-
from copy import
|
|
15
|
+
from copy import copy
|
|
17
16
|
from datetime import timedelta
|
|
18
17
|
from typing import Any, Optional, Union
|
|
19
18
|
|
|
19
|
+
import numpy as np
|
|
20
20
|
import storey
|
|
21
21
|
|
|
22
22
|
import mlrun
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
25
|
+
import mlrun.feature_store
|
|
25
26
|
import mlrun.serving
|
|
27
|
+
from mlrun.common.model_monitoring.helpers import (
|
|
28
|
+
get_model_endpoints_creation_task_status,
|
|
29
|
+
)
|
|
26
30
|
from mlrun.common.schemas import MonitoringData
|
|
27
|
-
from mlrun.utils import logger
|
|
31
|
+
from mlrun.utils import get_data_from_path, logger
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MatchingEndpointsState(mlrun.common.types.StrEnum):
|
|
35
|
+
all_matched = "all_matched"
|
|
36
|
+
not_all_matched = "not_all_matched"
|
|
37
|
+
no_check_needed = "no_check_needed"
|
|
38
|
+
not_yet_checked = "not_yet_matched"
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def success_states() -> list[str]:
|
|
42
|
+
return [
|
|
43
|
+
MatchingEndpointsState.all_matched,
|
|
44
|
+
MatchingEndpointsState.no_check_needed,
|
|
45
|
+
]
|
|
28
46
|
|
|
29
47
|
|
|
30
48
|
class MonitoringPreProcessor(storey.MapClass):
|
|
@@ -45,46 +63,20 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
45
63
|
result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
|
|
46
64
|
input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
|
|
47
65
|
|
|
48
|
-
result = self._get_data_from_path(
|
|
49
|
-
result_path, event.body.get(model, event.body)
|
|
50
|
-
)
|
|
51
66
|
output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
|
|
52
67
|
input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
|
|
53
|
-
logger.debug(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
outputs = []
|
|
66
|
-
if not output_schema:
|
|
67
|
-
logger.warn(
|
|
68
|
-
"Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
|
|
69
|
-
"may not preserved"
|
|
70
|
-
)
|
|
71
|
-
else:
|
|
72
|
-
outputs = result
|
|
73
|
-
|
|
74
|
-
event_inputs = event._metadata.get("inputs", {})
|
|
75
|
-
event_inputs = self._get_data_from_path(input_path, event_inputs)
|
|
76
|
-
if isinstance(event_inputs, dict):
|
|
77
|
-
if len(event_inputs) > 1:
|
|
78
|
-
# transpose by key the inputs:
|
|
79
|
-
inputs = self.transpose_by_key(event_inputs, input_schema)
|
|
80
|
-
else:
|
|
81
|
-
inputs = (
|
|
82
|
-
event_inputs[input_schema[0]]
|
|
83
|
-
if input_schema
|
|
84
|
-
else list(result.values())[0]
|
|
85
|
-
)
|
|
86
|
-
else:
|
|
87
|
-
inputs = event_inputs
|
|
68
|
+
logger.debug(
|
|
69
|
+
"output and input schema retrieved",
|
|
70
|
+
output_schema=output_schema,
|
|
71
|
+
input_schema=input_schema,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
outputs, new_output_schema = self.get_listed_data(
|
|
75
|
+
event.body.get(model, event.body), result_path, output_schema
|
|
76
|
+
)
|
|
77
|
+
inputs, new_input_schema = self.get_listed_data(
|
|
78
|
+
event._metadata.get("inputs", {}), input_path, input_schema
|
|
79
|
+
)
|
|
88
80
|
|
|
89
81
|
if outputs and isinstance(outputs[0], list):
|
|
90
82
|
if output_schema and len(output_schema) != len(outputs[0]):
|
|
@@ -104,48 +96,124 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
104
96
|
output_len=len(outputs),
|
|
105
97
|
schema_len=len(output_schema),
|
|
106
98
|
)
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
if len(inputs) != len(outputs):
|
|
100
|
+
logger.warn(
|
|
101
|
+
"outputs and inputs are not in the same length check 'input_path' and "
|
|
102
|
+
"'output_path' was specified if needed"
|
|
103
|
+
)
|
|
104
|
+
request = {
|
|
105
|
+
"inputs": inputs,
|
|
106
|
+
"id": getattr(event, "id", None),
|
|
107
|
+
"input_schema": new_input_schema,
|
|
108
|
+
}
|
|
109
|
+
resp = {"outputs": outputs, "output_schema": new_output_schema}
|
|
109
110
|
|
|
110
111
|
return request, resp
|
|
111
112
|
|
|
113
|
+
def get_listed_data(
|
|
114
|
+
self,
|
|
115
|
+
raw_data: dict,
|
|
116
|
+
data_path: Optional[Union[list[str], str]] = None,
|
|
117
|
+
schema: Optional[list[str]] = None,
|
|
118
|
+
):
|
|
119
|
+
"""Get data from a path and transpose it by keys if dict is provided."""
|
|
120
|
+
new_schema = None
|
|
121
|
+
data_from_path = get_data_from_path(data_path, raw_data)
|
|
122
|
+
if isinstance(data_from_path, dict):
|
|
123
|
+
# transpose by key the inputs:
|
|
124
|
+
listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
|
|
125
|
+
new_schema = new_schema or schema
|
|
126
|
+
if not schema:
|
|
127
|
+
logger.warn(
|
|
128
|
+
f"No schema provided through add_model(); the order of {data_from_path} "
|
|
129
|
+
"may not be preserved."
|
|
130
|
+
)
|
|
131
|
+
elif not isinstance(data_from_path, list):
|
|
132
|
+
listed_data = [data_from_path]
|
|
133
|
+
else:
|
|
134
|
+
listed_data = data_from_path
|
|
135
|
+
return listed_data, new_schema
|
|
136
|
+
|
|
112
137
|
@staticmethod
|
|
113
138
|
def transpose_by_key(
|
|
114
|
-
|
|
115
|
-
) -> list[list[
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if not schema
|
|
119
|
-
else [data_to_transpose[key] for key in schema]
|
|
120
|
-
)
|
|
121
|
-
if values and not isinstance(values[0], list):
|
|
122
|
-
values = [values]
|
|
123
|
-
transposed = (
|
|
124
|
-
list(map(list, zip(*values)))
|
|
125
|
-
if all(isinstance(v, list) for v in values) and len(values) > 1
|
|
126
|
-
else values
|
|
127
|
-
)
|
|
128
|
-
return transposed
|
|
139
|
+
data: dict, schema: Optional[Union[str, list[str]]] = None
|
|
140
|
+
) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
|
|
141
|
+
"""
|
|
142
|
+
Transpose values from a dictionary by keys.
|
|
129
143
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
144
|
+
Given a dictionary and an optional schema (a key or list of keys), this function:
|
|
145
|
+
- Extracts the values for the specified keys (or all keys if no schema is provided).
|
|
146
|
+
- Ensures the data is represented as a list of rows, then transposes it (i.e., switches rows to columns).
|
|
147
|
+
- Handles edge cases:
|
|
148
|
+
* If a single scalar or single-element list is provided, returns a flat list.
|
|
149
|
+
* If a single key is provided (as a string or a list with one element), handles it properly.
|
|
150
|
+
* If only one row with len of one remains after transposition, unwraps it to avoid nested list-of-one.
|
|
151
|
+
|
|
152
|
+
Example::
|
|
153
|
+
|
|
154
|
+
transpose_by_key({"a": 1})
|
|
155
|
+
# returns: [1]
|
|
156
|
+
|
|
157
|
+
transpose_by_key({"a": [1, 2]})
|
|
158
|
+
# returns: [1 ,2]
|
|
159
|
+
|
|
160
|
+
transpose_by_key({"a": [1, 2], "b": [3, 4]})
|
|
161
|
+
# returns: [[1, 3], [2, 4]]
|
|
162
|
+
|
|
163
|
+
:param data: Dictionary with values that are either scalars or lists.
|
|
164
|
+
:param schema: Optional key or list of keys to extract. If not provided, all keys are used.
|
|
165
|
+
Can be a string (single key) or a list of strings.
|
|
166
|
+
|
|
167
|
+
:return: Transposed values:
|
|
168
|
+
* If result is a single column or row, returns a flat list.
|
|
169
|
+
* If result is a matrix, returns a list of lists.
|
|
170
|
+
|
|
171
|
+
:raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
|
|
172
|
+
mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
|
|
173
|
+
"""
|
|
174
|
+
new_schema = None
|
|
175
|
+
# Normalize keys in data:
|
|
176
|
+
normalize_data = {
|
|
177
|
+
mlrun.feature_store.api.norm_column_name(k): copy(v)
|
|
178
|
+
for k, v in data.items()
|
|
179
|
+
}
|
|
180
|
+
# Normalize schema to list
|
|
181
|
+
if not schema:
|
|
182
|
+
keys = list(normalize_data.keys())
|
|
183
|
+
new_schema = keys
|
|
184
|
+
elif isinstance(schema, str):
|
|
185
|
+
keys = [mlrun.feature_store.api.norm_column_name(schema)]
|
|
142
186
|
else:
|
|
143
|
-
|
|
144
|
-
|
|
187
|
+
keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
|
|
188
|
+
|
|
189
|
+
values = [normalize_data[key] for key in keys if key in normalize_data]
|
|
190
|
+
if len(values) != len(keys):
|
|
191
|
+
raise mlrun.MLRunInvalidArgumentError(
|
|
192
|
+
f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
|
|
145
193
|
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
194
|
+
|
|
195
|
+
# Detect if all are scalars ie: int,float,str
|
|
196
|
+
all_scalars = all(not isinstance(v, (list, tuple, np.ndarray)) for v in values)
|
|
197
|
+
all_lists = all(isinstance(v, (list, tuple, np.ndarray)) for v in values)
|
|
198
|
+
|
|
199
|
+
if not (all_scalars or all_lists):
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"All values must be either scalars or lists of equal length."
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if all_scalars:
|
|
205
|
+
transposed = np.array([values], dtype=object)
|
|
206
|
+
elif all_lists and len(keys) > 1:
|
|
207
|
+
arrays = [np.array(v, dtype=object) for v in values]
|
|
208
|
+
mat = np.stack(arrays, axis=0)
|
|
209
|
+
transposed = mat.T
|
|
210
|
+
else:
|
|
211
|
+
return values[0], new_schema
|
|
212
|
+
|
|
213
|
+
if transposed.shape[1] == 1 and transposed.shape[0] == 1:
|
|
214
|
+
# Transform [[0]] -> [0]:
|
|
215
|
+
return transposed[:, 0].tolist(), new_schema
|
|
216
|
+
return transposed.tolist(), new_schema
|
|
149
217
|
|
|
150
218
|
def do(self, event):
|
|
151
219
|
monitoring_event_list = []
|
|
@@ -168,6 +236,12 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
168
236
|
request, resp = self.reconstruct_request_resp_fields(
|
|
169
237
|
event, model, monitoring_data[model]
|
|
170
238
|
)
|
|
239
|
+
if hasattr(event, "_original_timestamp"):
|
|
240
|
+
when = event._original_timestamp
|
|
241
|
+
else:
|
|
242
|
+
when = event._metadata.get(model, {}).get(
|
|
243
|
+
mm_schemas.StreamProcessingEvent.WHEN
|
|
244
|
+
)
|
|
171
245
|
monitoring_event_list.append(
|
|
172
246
|
{
|
|
173
247
|
mm_schemas.StreamProcessingEvent.MODEL: model,
|
|
@@ -177,17 +251,16 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
177
251
|
mm_schemas.StreamProcessingEvent.MICROSEC: event._metadata.get(
|
|
178
252
|
model, {}
|
|
179
253
|
).get(mm_schemas.StreamProcessingEvent.MICROSEC),
|
|
180
|
-
mm_schemas.StreamProcessingEvent.WHEN:
|
|
181
|
-
model, {}
|
|
182
|
-
).get(mm_schemas.StreamProcessingEvent.WHEN),
|
|
254
|
+
mm_schemas.StreamProcessingEvent.WHEN: when,
|
|
183
255
|
mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
|
|
184
256
|
model
|
|
185
257
|
].get(
|
|
186
258
|
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
187
259
|
),
|
|
188
|
-
mm_schemas.StreamProcessingEvent.LABELS:
|
|
260
|
+
mm_schemas.StreamProcessingEvent.LABELS: event.body[
|
|
189
261
|
model
|
|
190
|
-
].get(
|
|
262
|
+
].get("labels")
|
|
263
|
+
or {},
|
|
191
264
|
mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
|
|
192
265
|
if self.server
|
|
193
266
|
else None,
|
|
@@ -212,6 +285,10 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
212
285
|
request, resp = self.reconstruct_request_resp_fields(
|
|
213
286
|
event, model, monitoring_data[model]
|
|
214
287
|
)
|
|
288
|
+
if hasattr(event, "_original_timestamp"):
|
|
289
|
+
when = event._original_timestamp
|
|
290
|
+
else:
|
|
291
|
+
when = event._metadata.get(mm_schemas.StreamProcessingEvent.WHEN)
|
|
215
292
|
monitoring_event_list.append(
|
|
216
293
|
{
|
|
217
294
|
mm_schemas.StreamProcessingEvent.MODEL: model,
|
|
@@ -221,25 +298,20 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
221
298
|
mm_schemas.StreamProcessingEvent.MICROSEC: event._metadata.get(
|
|
222
299
|
mm_schemas.StreamProcessingEvent.MICROSEC
|
|
223
300
|
),
|
|
224
|
-
mm_schemas.StreamProcessingEvent.WHEN:
|
|
225
|
-
mm_schemas.StreamProcessingEvent.WHEN
|
|
226
|
-
),
|
|
301
|
+
mm_schemas.StreamProcessingEvent.WHEN: when,
|
|
227
302
|
mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
|
|
228
303
|
model
|
|
229
304
|
].get(mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID),
|
|
230
|
-
mm_schemas.StreamProcessingEvent.LABELS:
|
|
231
|
-
|
|
232
|
-
),
|
|
305
|
+
mm_schemas.StreamProcessingEvent.LABELS: event.body.get("labels")
|
|
306
|
+
or {},
|
|
233
307
|
mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
|
|
234
308
|
if self.server
|
|
235
309
|
else None,
|
|
236
310
|
mm_schemas.StreamProcessingEvent.REQUEST: request,
|
|
237
311
|
mm_schemas.StreamProcessingEvent.RESPONSE: resp,
|
|
238
|
-
mm_schemas.StreamProcessingEvent.ERROR: event.body
|
|
312
|
+
mm_schemas.StreamProcessingEvent.ERROR: event.body.get(
|
|
239
313
|
mm_schemas.StreamProcessingEvent.ERROR
|
|
240
|
-
|
|
241
|
-
if mm_schemas.StreamProcessingEvent.ERROR in event.body
|
|
242
|
-
else None,
|
|
314
|
+
),
|
|
243
315
|
mm_schemas.StreamProcessingEvent.METRICS: event.body[
|
|
244
316
|
mm_schemas.StreamProcessingEvent.METRICS
|
|
245
317
|
]
|
|
@@ -259,6 +331,9 @@ class BackgroundTaskStatus(storey.MapClass):
|
|
|
259
331
|
|
|
260
332
|
def __init__(self, **kwargs):
|
|
261
333
|
super().__init__(**kwargs)
|
|
334
|
+
self.matching_endpoints = MatchingEndpointsState.not_yet_checked
|
|
335
|
+
self.graph_model_endpoint_uids: set = set()
|
|
336
|
+
self.listed_model_endpoint_uids: set = set()
|
|
262
337
|
self.server: mlrun.serving.GraphServer = (
|
|
263
338
|
getattr(self.context, "server", None) if self.context else None
|
|
264
339
|
)
|
|
@@ -279,43 +354,47 @@ class BackgroundTaskStatus(storey.MapClass):
|
|
|
279
354
|
)
|
|
280
355
|
)
|
|
281
356
|
):
|
|
282
|
-
|
|
283
|
-
self.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
self.
|
|
287
|
-
|
|
357
|
+
(
|
|
358
|
+
self._background_task_state,
|
|
359
|
+
self._background_task_check_timestamp,
|
|
360
|
+
self.listed_model_endpoint_uids,
|
|
361
|
+
) = get_model_endpoints_creation_task_status(self.server)
|
|
362
|
+
if (
|
|
363
|
+
self.listed_model_endpoint_uids
|
|
364
|
+
and self.matching_endpoints == MatchingEndpointsState.not_yet_checked
|
|
365
|
+
):
|
|
366
|
+
if not self.graph_model_endpoint_uids:
|
|
367
|
+
self.graph_model_endpoint_uids = collect_model_endpoint_uids(
|
|
368
|
+
self.server
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if self.graph_model_endpoint_uids.issubset(self.listed_model_endpoint_uids):
|
|
372
|
+
self.matching_endpoints = MatchingEndpointsState.all_matched
|
|
373
|
+
elif self.listed_model_endpoint_uids is None:
|
|
374
|
+
self.matching_endpoints = MatchingEndpointsState.no_check_needed
|
|
288
375
|
|
|
289
376
|
if (
|
|
290
377
|
self._background_task_state
|
|
291
378
|
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
379
|
+
and self.matching_endpoints in MatchingEndpointsState.success_states()
|
|
292
380
|
):
|
|
293
381
|
return event
|
|
294
382
|
else:
|
|
295
383
|
return None
|
|
296
384
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
)
|
|
311
|
-
else: # in progress
|
|
312
|
-
logger.info(
|
|
313
|
-
f"Model endpoint creation task is still in progress with the current state: "
|
|
314
|
-
f"{background_task_state}. Events will not be monitored for the next "
|
|
315
|
-
f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
|
|
316
|
-
name=self.name,
|
|
317
|
-
background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
|
|
318
|
-
)
|
|
385
|
+
|
|
386
|
+
def collect_model_endpoint_uids(server: mlrun.serving.GraphServer) -> set[str]:
|
|
387
|
+
"""Collects all model endpoint UIDs from the server's graph steps."""
|
|
388
|
+
model_endpoint_uids = set()
|
|
389
|
+
for step in server.graph.steps.values():
|
|
390
|
+
if hasattr(step, "monitoring_data"):
|
|
391
|
+
for model in step.monitoring_data.keys():
|
|
392
|
+
uid = step.monitoring_data[model].get(
|
|
393
|
+
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
394
|
+
)
|
|
395
|
+
if uid:
|
|
396
|
+
model_endpoint_uids.add(uid)
|
|
397
|
+
return model_endpoint_uids
|
|
319
398
|
|
|
320
399
|
|
|
321
400
|
class SamplingStep(storey.MapClass):
|
|
@@ -337,7 +416,9 @@ class SamplingStep(storey.MapClass):
|
|
|
337
416
|
event=event,
|
|
338
417
|
sampling_percentage=self.sampling_percentage,
|
|
339
418
|
)
|
|
340
|
-
if self.sampling_percentage != 100
|
|
419
|
+
if self.sampling_percentage != 100 and not event.get(
|
|
420
|
+
mm_schemas.StreamProcessingEvent.ERROR
|
|
421
|
+
):
|
|
341
422
|
request = event[mm_schemas.StreamProcessingEvent.REQUEST]
|
|
342
423
|
num_of_inputs = len(request["inputs"])
|
|
343
424
|
sampled_requests_indices = self._pick_random_requests(
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -24,6 +24,9 @@ import mlrun.common.schemas.model_monitoring
|
|
|
24
24
|
import mlrun.model_monitoring
|
|
25
25
|
from mlrun.utils import logger, now_date
|
|
26
26
|
|
|
27
|
+
from ..common.model_monitoring.helpers import (
|
|
28
|
+
get_model_endpoints_creation_task_status,
|
|
29
|
+
)
|
|
27
30
|
from .utils import StepToDict, _extract_input_data, _update_result_body
|
|
28
31
|
|
|
29
32
|
|
|
@@ -474,22 +477,18 @@ class V2ModelServer(StepToDict):
|
|
|
474
477
|
) or getattr(self.context, "server", None)
|
|
475
478
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
476
479
|
if server.model_endpoint_creation_task_name:
|
|
477
|
-
|
|
478
|
-
server
|
|
479
|
-
)
|
|
480
|
-
logger.debug(
|
|
481
|
-
"Checking model endpoint creation task status",
|
|
482
|
-
task_name=server.model_endpoint_creation_task_name,
|
|
480
|
+
background_task_state, _, _ = get_model_endpoints_creation_task_status(
|
|
481
|
+
server
|
|
483
482
|
)
|
|
484
483
|
if (
|
|
485
|
-
|
|
484
|
+
background_task_state
|
|
486
485
|
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
487
486
|
):
|
|
488
487
|
logger.debug(
|
|
489
|
-
f"Model endpoint creation task completed with state {
|
|
488
|
+
f"Model endpoint creation task completed with state {background_task_state}"
|
|
490
489
|
)
|
|
491
490
|
if (
|
|
492
|
-
|
|
491
|
+
background_task_state
|
|
493
492
|
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
494
493
|
):
|
|
495
494
|
self._model_logger = (
|
|
@@ -504,7 +503,7 @@ class V2ModelServer(StepToDict):
|
|
|
504
503
|
else: # in progress
|
|
505
504
|
logger.debug(
|
|
506
505
|
f"Model endpoint creation task is still in progress with the current state: "
|
|
507
|
-
f"{
|
|
506
|
+
f"{background_task_state}.",
|
|
508
507
|
name=self.name,
|
|
509
508
|
)
|
|
510
509
|
else:
|