mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +24 -3
- mlrun/__main__.py +0 -4
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/artifacts/plots.py +1 -1
- mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
- mlrun/auth/nuclio.py +89 -0
- mlrun/auth/providers.py +429 -0
- mlrun/auth/utils.py +415 -0
- mlrun/common/constants.py +14 -0
- mlrun/common/model_monitoring/helpers.py +123 -0
- mlrun/common/runtimes/constants.py +28 -0
- mlrun/common/schemas/__init__.py +14 -3
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +3 -0
- mlrun/common/schemas/auth.py +12 -10
- mlrun/common/schemas/client_spec.py +4 -0
- mlrun/common/schemas/constants.py +25 -0
- mlrun/common/schemas/frontend_spec.py +1 -8
- mlrun/common/schemas/function.py +34 -0
- mlrun/common/schemas/hub.py +33 -20
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +12 -15
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/secret.py +17 -2
- mlrun/common/secrets.py +95 -1
- mlrun/common/types.py +10 -10
- mlrun/config.py +69 -19
- mlrun/data_types/infer.py +2 -2
- mlrun/datastore/__init__.py +12 -5
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/base.py +274 -10
- mlrun/datastore/datastore.py +7 -2
- mlrun/datastore/datastore_profile.py +84 -22
- mlrun/datastore/model_provider/huggingface_provider.py +225 -41
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +206 -74
- mlrun/datastore/model_provider/openai_provider.py +226 -66
- mlrun/datastore/s3.py +39 -18
- mlrun/datastore/sources.py +1 -1
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +17 -12
- mlrun/datastore/targets.py +1 -1
- mlrun/datastore/utils.py +25 -6
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +63 -32
- mlrun/db/httpdb.py +373 -153
- mlrun/db/nopdb.py +54 -21
- mlrun/errors.py +4 -2
- mlrun/execution.py +66 -25
- mlrun/feature_store/api.py +1 -1
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_vector_utils.py +1 -1
- mlrun/feature_store/steps.py +8 -6
- mlrun/frameworks/_common/utils.py +3 -3
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +2 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
- mlrun/frameworks/onnx/dataset.py +2 -1
- mlrun/frameworks/onnx/mlrun_interface.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/frameworks/pytorch/utils.py +2 -1
- mlrun/frameworks/sklearn/metric.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
- mlrun/hub/__init__.py +52 -0
- mlrun/hub/base.py +142 -0
- mlrun/hub/module.py +172 -0
- mlrun/hub/step.py +113 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +15 -7
- mlrun/launcher/local.py +4 -1
- mlrun/model.py +14 -4
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +65 -28
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +299 -128
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/controller.py +132 -58
- mlrun/model_monitoring/db/_schedules.py +38 -29
- mlrun/model_monitoring/db/_stats.py +6 -16
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
- mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
- mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
- mlrun/model_monitoring/features_drift_table.py +2 -1
- mlrun/model_monitoring/helpers.py +30 -6
- mlrun/model_monitoring/stream_processing.py +34 -28
- mlrun/model_monitoring/writer.py +224 -4
- mlrun/package/__init__.py +2 -1
- mlrun/platforms/__init__.py +0 -43
- mlrun/platforms/iguazio.py +8 -4
- mlrun/projects/operations.py +17 -11
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +187 -123
- mlrun/run.py +95 -21
- mlrun/runtimes/__init__.py +2 -186
- mlrun/runtimes/base.py +103 -25
- mlrun/runtimes/constants.py +225 -0
- mlrun/runtimes/daskjob.py +5 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +12 -7
- mlrun/runtimes/nuclio/api_gateway.py +36 -6
- mlrun/runtimes/nuclio/application/application.py +339 -40
- mlrun/runtimes/nuclio/function.py +222 -72
- mlrun/runtimes/nuclio/serving.py +132 -42
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +99 -14
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +84 -11
- mlrun/serving/routers.py +26 -44
- mlrun/serving/server.py +138 -51
- mlrun/serving/serving_wrapper.py +6 -2
- mlrun/serving/states.py +997 -283
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +149 -95
- mlrun/serving/v2_serving.py +9 -10
- mlrun/track/trackers/mlflow_tracker.py +29 -31
- mlrun/utils/helpers.py +292 -94
- mlrun/utils/http.py +9 -2
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +3 -5
- mlrun/utils/notifications/notification/mail.py +39 -16
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +3 -3
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +3 -4
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
- mlrun/api/schemas/__init__.py +0 -259
- mlrun/db/auth_utils.py +0 -152
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
mlrun/serving/steps.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Union
|
|
16
|
+
|
|
17
|
+
import storey
|
|
18
|
+
|
|
19
|
+
import mlrun.errors
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ChoiceByField(storey.Choice):
|
|
23
|
+
"""
|
|
24
|
+
Selects downstream outlets to route each event based on a predetermined field.
|
|
25
|
+
:param field_name: event field name that contains the step name or names of the desired outlet or outlets
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, field_name: Union[str, list[str]], **kwargs):
|
|
29
|
+
self.field_name = field_name
|
|
30
|
+
super().__init__(**kwargs)
|
|
31
|
+
|
|
32
|
+
def select_outlets(self, event):
|
|
33
|
+
# Case 1: Missing field
|
|
34
|
+
if self.field_name not in event:
|
|
35
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
36
|
+
f"Field '{self.field_name}' is not contained in the event keys {list(event.keys())}."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
outlet = event[self.field_name]
|
|
40
|
+
|
|
41
|
+
# Case 2: Field exists but is None
|
|
42
|
+
if outlet is None:
|
|
43
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
44
|
+
f"Field '{self.field_name}' exists but its value is None."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Case 3: Invalid type
|
|
48
|
+
if not isinstance(outlet, str | list | tuple):
|
|
49
|
+
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
50
|
+
f"Field '{self.field_name}' must be a string or list of strings "
|
|
51
|
+
f"but is instead of type '{type(outlet).__name__}'."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
outlets = [outlet] if isinstance(outlet, str) else outlet
|
|
55
|
+
|
|
56
|
+
# Case 4: Empty list or tuple
|
|
57
|
+
if not outlets:
|
|
58
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
59
|
+
f"The value of the key '{self.field_name}' cannot be an empty {type(outlets).__name__}."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return outlets
|
mlrun/serving/system_steps.py
CHANGED
|
@@ -11,8 +11,9 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import random
|
|
15
|
+
import typing
|
|
16
|
+
from copy import copy
|
|
16
17
|
from datetime import timedelta
|
|
17
18
|
from typing import Any, Optional, Union
|
|
18
19
|
|
|
@@ -22,11 +23,29 @@ import storey
|
|
|
22
23
|
import mlrun
|
|
23
24
|
import mlrun.artifacts
|
|
24
25
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
26
|
+
import mlrun.feature_store
|
|
25
27
|
import mlrun.serving
|
|
28
|
+
from mlrun.common.model_monitoring.helpers import (
|
|
29
|
+
get_model_endpoints_creation_task_status,
|
|
30
|
+
)
|
|
26
31
|
from mlrun.common.schemas import MonitoringData
|
|
27
32
|
from mlrun.utils import get_data_from_path, logger
|
|
28
33
|
|
|
29
34
|
|
|
35
|
+
class MatchingEndpointsState(mlrun.common.types.StrEnum):
|
|
36
|
+
all_matched = "all_matched"
|
|
37
|
+
not_all_matched = "not_all_matched"
|
|
38
|
+
no_check_needed = "no_check_needed"
|
|
39
|
+
not_yet_checked = "not_yet_matched"
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def success_states() -> list[str]:
|
|
43
|
+
return [
|
|
44
|
+
MatchingEndpointsState.all_matched,
|
|
45
|
+
MatchingEndpointsState.no_check_needed,
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
30
49
|
class MonitoringPreProcessor(storey.MapClass):
|
|
31
50
|
"""preprocess step, reconstructs the serving output event body to StreamProcessingEvent schema"""
|
|
32
51
|
|
|
@@ -45,33 +64,24 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
45
64
|
result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
|
|
46
65
|
input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
|
|
47
66
|
|
|
48
|
-
result = get_data_from_path(result_path, event.body.get(model, event.body))
|
|
49
67
|
output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
|
|
50
68
|
input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
|
|
51
|
-
logger.debug(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
else:
|
|
61
|
-
outputs = result
|
|
62
|
-
|
|
63
|
-
event_inputs = event._metadata.get("inputs", {})
|
|
64
|
-
event_inputs = get_data_from_path(input_path, event_inputs)
|
|
65
|
-
if isinstance(event_inputs, dict):
|
|
66
|
-
# transpose by key the inputs:
|
|
67
|
-
inputs = self.transpose_by_key(event_inputs, input_schema)
|
|
68
|
-
if not input_schema:
|
|
69
|
-
logger.warn(
|
|
70
|
-
"Input schema was not provided using by ModelRunnerStep:add_model, order "
|
|
71
|
-
"may not preserved"
|
|
72
|
-
)
|
|
69
|
+
logger.debug(
|
|
70
|
+
"output and input schema retrieved",
|
|
71
|
+
output_schema=output_schema,
|
|
72
|
+
input_schema=input_schema,
|
|
73
|
+
)
|
|
74
|
+
if event.body and isinstance(event.body, list):
|
|
75
|
+
outputs, new_output_schema = self.get_listed_data(
|
|
76
|
+
event.body, result_path, output_schema
|
|
77
|
+
)
|
|
73
78
|
else:
|
|
74
|
-
|
|
79
|
+
outputs, new_output_schema = self.get_listed_data(
|
|
80
|
+
event.body.get(model, event.body), result_path, output_schema
|
|
81
|
+
)
|
|
82
|
+
inputs, new_input_schema = self.get_listed_data(
|
|
83
|
+
event._metadata.get("inputs", {}), input_path, input_schema
|
|
84
|
+
)
|
|
75
85
|
|
|
76
86
|
if outputs and isinstance(outputs[0], list):
|
|
77
87
|
if output_schema and len(output_schema) != len(outputs[0]):
|
|
@@ -96,15 +106,43 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
96
106
|
"outputs and inputs are not in the same length check 'input_path' and "
|
|
97
107
|
"'output_path' was specified if needed"
|
|
98
108
|
)
|
|
99
|
-
request = {
|
|
100
|
-
|
|
109
|
+
request = {
|
|
110
|
+
"inputs": inputs,
|
|
111
|
+
"id": getattr(event, "id", None),
|
|
112
|
+
"input_schema": new_input_schema,
|
|
113
|
+
}
|
|
114
|
+
resp = {"outputs": outputs, "output_schema": new_output_schema}
|
|
101
115
|
|
|
102
116
|
return request, resp
|
|
103
117
|
|
|
118
|
+
def get_listed_data(
|
|
119
|
+
self,
|
|
120
|
+
raw_data: typing.Union[dict, list],
|
|
121
|
+
data_path: Optional[Union[list[str], str]] = None,
|
|
122
|
+
schema: Optional[list[str]] = None,
|
|
123
|
+
):
|
|
124
|
+
"""Get data from a path and transpose it by keys if dict is provided."""
|
|
125
|
+
new_schema = None
|
|
126
|
+
data_from_path = get_data_from_path(data_path, raw_data)
|
|
127
|
+
if isinstance(data_from_path, dict):
|
|
128
|
+
# transpose by key the inputs:
|
|
129
|
+
listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
|
|
130
|
+
new_schema = new_schema or schema
|
|
131
|
+
if not schema:
|
|
132
|
+
logger.warn(
|
|
133
|
+
f"No schema provided through add_model(); the order of {data_from_path} "
|
|
134
|
+
"may not be preserved."
|
|
135
|
+
)
|
|
136
|
+
elif not isinstance(data_from_path, list):
|
|
137
|
+
listed_data = [data_from_path]
|
|
138
|
+
else:
|
|
139
|
+
listed_data = data_from_path
|
|
140
|
+
return listed_data, new_schema
|
|
141
|
+
|
|
104
142
|
@staticmethod
|
|
105
143
|
def transpose_by_key(
|
|
106
144
|
data: dict, schema: Optional[Union[str, list[str]]] = None
|
|
107
|
-
) -> Union[list[Any], list[list[Any]]]:
|
|
145
|
+
) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
|
|
108
146
|
"""
|
|
109
147
|
Transpose values from a dictionary by keys.
|
|
110
148
|
|
|
@@ -136,25 +174,32 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
136
174
|
* If result is a matrix, returns a list of lists.
|
|
137
175
|
|
|
138
176
|
:raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
|
|
177
|
+
mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
|
|
139
178
|
"""
|
|
140
|
-
|
|
179
|
+
new_schema = None
|
|
180
|
+
# Normalize keys in data:
|
|
181
|
+
normalize_data = {
|
|
182
|
+
mlrun.feature_store.api.norm_column_name(k): copy(v)
|
|
183
|
+
for k, v in data.items()
|
|
184
|
+
}
|
|
141
185
|
# Normalize schema to list
|
|
142
186
|
if not schema:
|
|
143
|
-
keys = list(
|
|
187
|
+
keys = list(normalize_data.keys())
|
|
188
|
+
new_schema = keys
|
|
144
189
|
elif isinstance(schema, str):
|
|
145
|
-
keys = [schema]
|
|
190
|
+
keys = [mlrun.feature_store.api.norm_column_name(schema)]
|
|
146
191
|
else:
|
|
147
|
-
keys = schema
|
|
192
|
+
keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
|
|
148
193
|
|
|
149
|
-
values = [
|
|
194
|
+
values = [normalize_data[key] for key in keys if key in normalize_data]
|
|
150
195
|
if len(values) != len(keys):
|
|
151
196
|
raise mlrun.MLRunInvalidArgumentError(
|
|
152
|
-
f"Schema keys {keys}
|
|
197
|
+
f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
|
|
153
198
|
)
|
|
154
199
|
|
|
155
200
|
# Detect if all are scalars ie: int,float,str
|
|
156
|
-
all_scalars = all(not isinstance(v,
|
|
157
|
-
all_lists = all(isinstance(v,
|
|
201
|
+
all_scalars = all(not isinstance(v, list | tuple | np.ndarray) for v in values)
|
|
202
|
+
all_lists = all(isinstance(v, list | tuple | np.ndarray) for v in values)
|
|
158
203
|
|
|
159
204
|
if not (all_scalars or all_lists):
|
|
160
205
|
raise ValueError(
|
|
@@ -168,12 +213,12 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
168
213
|
mat = np.stack(arrays, axis=0)
|
|
169
214
|
transposed = mat.T
|
|
170
215
|
else:
|
|
171
|
-
return values[0]
|
|
216
|
+
return values[0], new_schema
|
|
172
217
|
|
|
173
218
|
if transposed.shape[1] == 1 and transposed.shape[0] == 1:
|
|
174
219
|
# Transform [[0]] -> [0]:
|
|
175
|
-
return transposed[:, 0].tolist()
|
|
176
|
-
return transposed.tolist()
|
|
220
|
+
return transposed[:, 0].tolist(), new_schema
|
|
221
|
+
return transposed.tolist(), new_schema
|
|
177
222
|
|
|
178
223
|
def do(self, event):
|
|
179
224
|
monitoring_event_list = []
|
|
@@ -202,6 +247,21 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
202
247
|
when = event._metadata.get(model, {}).get(
|
|
203
248
|
mm_schemas.StreamProcessingEvent.WHEN
|
|
204
249
|
)
|
|
250
|
+
# if the body is not a dict, use empty labels, error and metrics
|
|
251
|
+
if isinstance(event.body[model], dict):
|
|
252
|
+
body_by_model = event.body[model]
|
|
253
|
+
labels = body_by_model.get("labels") or {}
|
|
254
|
+
error = body_by_model.get(
|
|
255
|
+
mm_schemas.StreamProcessingEvent.ERROR
|
|
256
|
+
)
|
|
257
|
+
metrics = body_by_model.get(
|
|
258
|
+
mm_schemas.StreamProcessingEvent.METRICS
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
labels = {}
|
|
262
|
+
error = None
|
|
263
|
+
metrics = None
|
|
264
|
+
|
|
205
265
|
monitoring_event_list.append(
|
|
206
266
|
{
|
|
207
267
|
mm_schemas.StreamProcessingEvent.MODEL: model,
|
|
@@ -217,26 +277,14 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
217
277
|
].get(
|
|
218
278
|
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
219
279
|
),
|
|
220
|
-
mm_schemas.StreamProcessingEvent.LABELS:
|
|
221
|
-
model
|
|
222
|
-
].get(mlrun.common.schemas.MonitoringData.OUTPUTS),
|
|
280
|
+
mm_schemas.StreamProcessingEvent.LABELS: labels,
|
|
223
281
|
mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
|
|
224
282
|
if self.server
|
|
225
283
|
else None,
|
|
226
284
|
mm_schemas.StreamProcessingEvent.REQUEST: request,
|
|
227
285
|
mm_schemas.StreamProcessingEvent.RESPONSE: resp,
|
|
228
|
-
mm_schemas.StreamProcessingEvent.ERROR:
|
|
229
|
-
|
|
230
|
-
]
|
|
231
|
-
if mm_schemas.StreamProcessingEvent.ERROR
|
|
232
|
-
in event.body[model]
|
|
233
|
-
else None,
|
|
234
|
-
mm_schemas.StreamProcessingEvent.METRICS: event.body[model][
|
|
235
|
-
mm_schemas.StreamProcessingEvent.METRICS
|
|
236
|
-
]
|
|
237
|
-
if mm_schemas.StreamProcessingEvent.METRICS
|
|
238
|
-
in event.body[model]
|
|
239
|
-
else None,
|
|
286
|
+
mm_schemas.StreamProcessingEvent.ERROR: error,
|
|
287
|
+
mm_schemas.StreamProcessingEvent.METRICS: metrics,
|
|
240
288
|
}
|
|
241
289
|
)
|
|
242
290
|
elif monitoring_data:
|
|
@@ -248,6 +296,15 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
248
296
|
when = event._original_timestamp
|
|
249
297
|
else:
|
|
250
298
|
when = event._metadata.get(mm_schemas.StreamProcessingEvent.WHEN)
|
|
299
|
+
# if the body is not a dict, use empty labels, error and metrics
|
|
300
|
+
if isinstance(event.body, dict):
|
|
301
|
+
labels = event.body.get("labels") or {}
|
|
302
|
+
error = event.body.get(mm_schemas.StreamProcessingEvent.ERROR)
|
|
303
|
+
metrics = event.body.get(mm_schemas.StreamProcessingEvent.METRICS)
|
|
304
|
+
else:
|
|
305
|
+
labels = {}
|
|
306
|
+
error = None
|
|
307
|
+
metrics = None
|
|
251
308
|
monitoring_event_list.append(
|
|
252
309
|
{
|
|
253
310
|
mm_schemas.StreamProcessingEvent.MODEL: model,
|
|
@@ -261,24 +318,14 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
261
318
|
mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
|
|
262
319
|
model
|
|
263
320
|
].get(mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID),
|
|
264
|
-
mm_schemas.StreamProcessingEvent.LABELS:
|
|
265
|
-
mlrun.common.schemas.MonitoringData.OUTPUTS
|
|
266
|
-
),
|
|
321
|
+
mm_schemas.StreamProcessingEvent.LABELS: labels,
|
|
267
322
|
mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
|
|
268
323
|
if self.server
|
|
269
324
|
else None,
|
|
270
325
|
mm_schemas.StreamProcessingEvent.REQUEST: request,
|
|
271
326
|
mm_schemas.StreamProcessingEvent.RESPONSE: resp,
|
|
272
|
-
mm_schemas.StreamProcessingEvent.ERROR:
|
|
273
|
-
|
|
274
|
-
]
|
|
275
|
-
if mm_schemas.StreamProcessingEvent.ERROR in event.body
|
|
276
|
-
else None,
|
|
277
|
-
mm_schemas.StreamProcessingEvent.METRICS: event.body[
|
|
278
|
-
mm_schemas.StreamProcessingEvent.METRICS
|
|
279
|
-
]
|
|
280
|
-
if mm_schemas.StreamProcessingEvent.METRICS in event.body
|
|
281
|
-
else None,
|
|
327
|
+
mm_schemas.StreamProcessingEvent.ERROR: error,
|
|
328
|
+
mm_schemas.StreamProcessingEvent.METRICS: metrics,
|
|
282
329
|
}
|
|
283
330
|
)
|
|
284
331
|
event.body = monitoring_event_list
|
|
@@ -293,6 +340,9 @@ class BackgroundTaskStatus(storey.MapClass):
|
|
|
293
340
|
|
|
294
341
|
def __init__(self, **kwargs):
|
|
295
342
|
super().__init__(**kwargs)
|
|
343
|
+
self.matching_endpoints = MatchingEndpointsState.not_yet_checked
|
|
344
|
+
self.graph_model_endpoint_uids: set = set()
|
|
345
|
+
self.listed_model_endpoint_uids: set = set()
|
|
296
346
|
self.server: mlrun.serving.GraphServer = (
|
|
297
347
|
getattr(self.context, "server", None) if self.context else None
|
|
298
348
|
)
|
|
@@ -313,43 +363,47 @@ class BackgroundTaskStatus(storey.MapClass):
|
|
|
313
363
|
)
|
|
314
364
|
)
|
|
315
365
|
):
|
|
316
|
-
|
|
317
|
-
self.
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
self.
|
|
321
|
-
|
|
366
|
+
(
|
|
367
|
+
self._background_task_state,
|
|
368
|
+
self._background_task_check_timestamp,
|
|
369
|
+
self.listed_model_endpoint_uids,
|
|
370
|
+
) = get_model_endpoints_creation_task_status(self.server)
|
|
371
|
+
if (
|
|
372
|
+
self.listed_model_endpoint_uids
|
|
373
|
+
and self.matching_endpoints == MatchingEndpointsState.not_yet_checked
|
|
374
|
+
):
|
|
375
|
+
if not self.graph_model_endpoint_uids:
|
|
376
|
+
self.graph_model_endpoint_uids = collect_model_endpoint_uids(
|
|
377
|
+
self.server
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
if self.graph_model_endpoint_uids.issubset(self.listed_model_endpoint_uids):
|
|
381
|
+
self.matching_endpoints = MatchingEndpointsState.all_matched
|
|
382
|
+
elif self.listed_model_endpoint_uids is None:
|
|
383
|
+
self.matching_endpoints = MatchingEndpointsState.no_check_needed
|
|
322
384
|
|
|
323
385
|
if (
|
|
324
386
|
self._background_task_state
|
|
325
387
|
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
388
|
+
and self.matching_endpoints in MatchingEndpointsState.success_states()
|
|
326
389
|
):
|
|
327
390
|
return event
|
|
328
391
|
else:
|
|
329
392
|
return None
|
|
330
393
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
)
|
|
345
|
-
else: # in progress
|
|
346
|
-
logger.info(
|
|
347
|
-
f"Model endpoint creation task is still in progress with the current state: "
|
|
348
|
-
f"{background_task_state}. Events will not be monitored for the next "
|
|
349
|
-
f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
|
|
350
|
-
name=self.name,
|
|
351
|
-
background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
|
|
352
|
-
)
|
|
394
|
+
|
|
395
|
+
def collect_model_endpoint_uids(server: mlrun.serving.GraphServer) -> set[str]:
|
|
396
|
+
"""Collects all model endpoint UIDs from the server's graph steps."""
|
|
397
|
+
model_endpoint_uids = set()
|
|
398
|
+
for step in server.graph.steps.values():
|
|
399
|
+
if hasattr(step, "monitoring_data"):
|
|
400
|
+
for model in step.monitoring_data.keys():
|
|
401
|
+
uid = step.monitoring_data[model].get(
|
|
402
|
+
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
403
|
+
)
|
|
404
|
+
if uid:
|
|
405
|
+
model_endpoint_uids.add(uid)
|
|
406
|
+
return model_endpoint_uids
|
|
353
407
|
|
|
354
408
|
|
|
355
409
|
class SamplingStep(storey.MapClass):
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -24,6 +24,9 @@ import mlrun.common.schemas.model_monitoring
|
|
|
24
24
|
import mlrun.model_monitoring
|
|
25
25
|
from mlrun.utils import logger, now_date
|
|
26
26
|
|
|
27
|
+
from ..common.model_monitoring.helpers import (
|
|
28
|
+
get_model_endpoints_creation_task_status,
|
|
29
|
+
)
|
|
27
30
|
from .utils import StepToDict, _extract_input_data, _update_result_body
|
|
28
31
|
|
|
29
32
|
|
|
@@ -474,22 +477,18 @@ class V2ModelServer(StepToDict):
|
|
|
474
477
|
) or getattr(self.context, "server", None)
|
|
475
478
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
476
479
|
if server.model_endpoint_creation_task_name:
|
|
477
|
-
|
|
478
|
-
server
|
|
479
|
-
)
|
|
480
|
-
logger.debug(
|
|
481
|
-
"Checking model endpoint creation task status",
|
|
482
|
-
task_name=server.model_endpoint_creation_task_name,
|
|
480
|
+
background_task_state, _, _ = get_model_endpoints_creation_task_status(
|
|
481
|
+
server
|
|
483
482
|
)
|
|
484
483
|
if (
|
|
485
|
-
|
|
484
|
+
background_task_state
|
|
486
485
|
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
487
486
|
):
|
|
488
487
|
logger.debug(
|
|
489
|
-
f"Model endpoint creation task completed with state {
|
|
488
|
+
f"Model endpoint creation task completed with state {background_task_state}"
|
|
490
489
|
)
|
|
491
490
|
if (
|
|
492
|
-
|
|
491
|
+
background_task_state
|
|
493
492
|
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
494
493
|
):
|
|
495
494
|
self._model_logger = (
|
|
@@ -504,7 +503,7 @@ class V2ModelServer(StepToDict):
|
|
|
504
503
|
else: # in progress
|
|
505
504
|
logger.debug(
|
|
506
505
|
f"Model endpoint creation task is still in progress with the current state: "
|
|
507
|
-
f"{
|
|
506
|
+
f"{background_task_state}.",
|
|
508
507
|
name=self.name,
|
|
509
508
|
)
|
|
510
509
|
else:
|
|
@@ -217,7 +217,7 @@ class MLFlowTracker(Tracker):
|
|
|
217
217
|
handler=handler,
|
|
218
218
|
run_name=run.info.run_name,
|
|
219
219
|
project_name=project.name,
|
|
220
|
-
uid=run.info.
|
|
220
|
+
uid=run.info.run_id,
|
|
221
221
|
)
|
|
222
222
|
|
|
223
223
|
# Create a context from the run object:
|
|
@@ -373,7 +373,7 @@ class MLFlowTracker(Tracker):
|
|
|
373
373
|
# Import the MLFlow run's artifacts to MLRun (model are logged after the rest of artifacts
|
|
374
374
|
# so the artifacts can be registered as extra data in the models):
|
|
375
375
|
artifacts = {}
|
|
376
|
-
|
|
376
|
+
model_uris = []
|
|
377
377
|
for artifact in client.list_artifacts(run_id=run.info.run_id):
|
|
378
378
|
# Get the artifact's local path (MLFlow suggests that if the artifact is already in the local filesystem
|
|
379
379
|
# its local path will be returned:
|
|
@@ -381,29 +381,29 @@ class MLFlowTracker(Tracker):
|
|
|
381
381
|
run_id=run.info.run_id,
|
|
382
382
|
artifact_path=artifact.path,
|
|
383
383
|
)
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
384
|
+
# Log the artifact:
|
|
385
|
+
artifact = MLFlowTracker._log_artifact(
|
|
386
|
+
context=context,
|
|
387
|
+
key=pathlib.Path(artifact.path).name.replace(".", "_"),
|
|
388
|
+
# Mlflow has the same name for files but with different extensions, so we add extension to name
|
|
389
|
+
local_path=artifact_local_path,
|
|
390
|
+
tmp_path=tmp_dir,
|
|
391
|
+
)
|
|
392
|
+
artifacts[artifact.key] = artifact
|
|
393
|
+
|
|
394
|
+
# get all run model's uri's (artifact_location in mlflow 3.0.0).
|
|
395
|
+
logged_models = mlflow.search_logged_models(
|
|
396
|
+
filter_string=f"source_run_id = '{run.info.run_id}'",
|
|
397
|
+
output_format="list",
|
|
398
|
+
)
|
|
399
|
+
for logged_model in logged_models:
|
|
400
|
+
model_uris.append(logged_model.artifact_location)
|
|
401
401
|
|
|
402
|
-
for
|
|
402
|
+
for model_uri in model_uris:
|
|
403
403
|
MLFlowTracker._log_model(
|
|
404
404
|
context=context,
|
|
405
|
-
model_uri=
|
|
406
|
-
key=pathlib.Path(
|
|
405
|
+
model_uri=model_uri,
|
|
406
|
+
key=pathlib.Path(model_uri).stem,
|
|
407
407
|
metrics=results,
|
|
408
408
|
extra_data=artifacts,
|
|
409
409
|
tmp_path=tmp_dir,
|
|
@@ -439,20 +439,18 @@ class MLFlowTracker(Tracker):
|
|
|
439
439
|
|
|
440
440
|
# Get the model info from MLFlow:
|
|
441
441
|
model_info = mlflow.models.get_model_info(model_uri=model_uri)
|
|
442
|
+
# Download the model and set the path to local path:
|
|
443
|
+
local_model_path = mlflow.artifacts.download_artifacts(
|
|
444
|
+
artifact_uri=str(model_uri)
|
|
445
|
+
)
|
|
446
|
+
model_path = pathlib.Path(local_model_path)
|
|
442
447
|
|
|
443
448
|
# Prepare the archive path:
|
|
444
|
-
|
|
445
|
-
archive_path = pathlib.Path(tmp_path) / f"{model_uri.stem}.zip"
|
|
446
|
-
if not os.path.exists(model_uri):
|
|
447
|
-
local_path = mlflow.artifacts.download_artifacts(
|
|
448
|
-
artifact_uri=str(model_uri)
|
|
449
|
-
)
|
|
450
|
-
model_uri = pathlib.Path(local_path)
|
|
451
|
-
|
|
449
|
+
archive_path = pathlib.Path(tmp_path) / f"{model_path.name}.zip"
|
|
452
450
|
# TODO add progress bar for the case of large files
|
|
453
451
|
# Zip the artifact:
|
|
454
452
|
with zipfile.ZipFile(archive_path, "w") as zip_file:
|
|
455
|
-
for path in
|
|
453
|
+
for path in model_path.rglob("*"):
|
|
456
454
|
zip_file.write(filename=path, arcname=path.relative_to(model_uri))
|
|
457
455
|
|
|
458
456
|
# Get inputs and outputs info:
|