mlrun 1.7.0rc42__py3-none-any.whl → 1.7.0rc44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +4 -2
- mlrun/artifacts/base.py +1 -1
- mlrun/artifacts/manager.py +15 -4
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/alert.py +11 -11
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/frontend_spec.py +7 -0
- mlrun/common/schemas/notification.py +32 -5
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/config.py +46 -21
- mlrun/data_types/data_types.py +5 -0
- mlrun/datastore/base.py +4 -7
- mlrun/datastore/storeytargets.py +4 -3
- mlrun/datastore/targets.py +17 -4
- mlrun/db/httpdb.py +2 -12
- mlrun/db/nopdb.py +21 -4
- mlrun/execution.py +7 -2
- mlrun/feature_store/api.py +1 -0
- mlrun/feature_store/retrieval/spark_merger.py +7 -3
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -3
- mlrun/k8s_utils.py +48 -2
- mlrun/launcher/client.py +6 -6
- mlrun/model.py +2 -1
- mlrun/model_monitoring/controller.py +1 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +15 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +12 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -2
- mlrun/model_monitoring/helpers.py +7 -15
- mlrun/model_monitoring/writer.py +8 -2
- mlrun/projects/pipelines.py +2 -0
- mlrun/projects/project.py +146 -57
- mlrun/render.py +3 -3
- mlrun/runtimes/kubejob.py +6 -6
- mlrun/runtimes/local.py +4 -1
- mlrun/runtimes/nuclio/api_gateway.py +6 -0
- mlrun/runtimes/nuclio/application/application.py +3 -2
- mlrun/runtimes/pod.py +16 -8
- mlrun/runtimes/sparkjob/spark3job.py +4 -0
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/helpers.py +56 -22
- mlrun/utils/notifications/notification/__init__.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/METADATA +27 -27
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/RECORD +50 -50
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/top_level.txt +0 -0
mlrun/frameworks/_common/plan.py
CHANGED
|
@@ -11,12 +11,12 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
18
|
from mlrun.artifacts import Artifact
|
|
19
|
-
from mlrun.utils.helpers import
|
|
19
|
+
from mlrun.utils.helpers import is_jupyter
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Plan(ABC):
|
|
@@ -84,7 +84,7 @@ class Plan(ABC):
|
|
|
84
84
|
return
|
|
85
85
|
|
|
86
86
|
# Call the correct display method according to the kernel:
|
|
87
|
-
if
|
|
87
|
+
if is_jupyter:
|
|
88
88
|
self._gui_display()
|
|
89
89
|
else:
|
|
90
90
|
self._cli_display()
|
|
@@ -18,8 +18,7 @@ from typing import Union
|
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
|
-
from IPython.
|
|
22
|
-
from IPython.display import display
|
|
21
|
+
from IPython.display import HTML, display
|
|
23
22
|
from pandas.api.types import is_numeric_dtype, is_string_dtype
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
@@ -216,7 +215,7 @@ def _show_and_export_html(html: str, show=None, filename=None, runs_list=None):
|
|
|
216
215
|
fp.write("</body></html>")
|
|
217
216
|
else:
|
|
218
217
|
fp.write(html)
|
|
219
|
-
if show or (show is None and mlrun.utils.
|
|
218
|
+
if show or (show is None and mlrun.utils.is_jupyter):
|
|
220
219
|
display(HTML(html))
|
|
221
220
|
if runs_list and len(runs_list) <= max_table_rows:
|
|
222
221
|
display(HTML(html_table))
|
mlrun/k8s_utils.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import re
|
|
15
|
+
import warnings
|
|
15
16
|
|
|
16
17
|
import kubernetes.client
|
|
17
18
|
|
|
@@ -133,7 +134,7 @@ def sanitize_label_value(value: str) -> str:
|
|
|
133
134
|
return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
|
|
134
135
|
|
|
135
136
|
|
|
136
|
-
def verify_label_key(key: str):
|
|
137
|
+
def verify_label_key(key: str, allow_k8s_prefix: bool = False):
|
|
137
138
|
"""
|
|
138
139
|
Verify that the label key is valid for Kubernetes.
|
|
139
140
|
Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
@@ -146,6 +147,10 @@ def verify_label_key(key: str):
|
|
|
146
147
|
name = parts[0]
|
|
147
148
|
elif len(parts) == 2:
|
|
148
149
|
prefix, name = parts
|
|
150
|
+
if len(name) == 0:
|
|
151
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
152
|
+
"Label key name cannot be empty when a prefix is set"
|
|
153
|
+
)
|
|
149
154
|
if len(prefix) == 0:
|
|
150
155
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
151
156
|
"Label key prefix cannot be empty"
|
|
@@ -173,7 +178,13 @@ def verify_label_key(key: str):
|
|
|
173
178
|
mlrun.utils.regex.qualified_name,
|
|
174
179
|
)
|
|
175
180
|
|
|
176
|
-
|
|
181
|
+
# Allow the use of Kubernetes reserved prefixes ('k8s.io/' or 'kubernetes.io/')
|
|
182
|
+
# only when setting node selectors, not when adding new labels.
|
|
183
|
+
if (
|
|
184
|
+
key.startswith("k8s.io/")
|
|
185
|
+
or key.startswith("kubernetes.io/")
|
|
186
|
+
and not allow_k8s_prefix
|
|
187
|
+
):
|
|
177
188
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
178
189
|
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
179
190
|
)
|
|
@@ -185,3 +196,38 @@ def verify_label_value(value, label_key):
|
|
|
185
196
|
value,
|
|
186
197
|
mlrun.utils.regex.label_value,
|
|
187
198
|
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def validate_node_selectors(
|
|
202
|
+
node_selectors: dict[str, str], raise_on_error: bool = True
|
|
203
|
+
) -> bool:
|
|
204
|
+
"""
|
|
205
|
+
Ensures that user-defined node selectors adhere to Kubernetes label standards:
|
|
206
|
+
- Validates that each key conforms to Kubernetes naming conventions, with specific rules for name and prefix.
|
|
207
|
+
- Ensures values comply with Kubernetes label value rules.
|
|
208
|
+
- If raise_on_error is True, raises errors for invalid selectors.
|
|
209
|
+
- If raise_on_error is False, logs warnings for invalid selectors.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
# Helper function for handling errors or warnings
|
|
213
|
+
def handle_invalid(message):
|
|
214
|
+
if raise_on_error:
|
|
215
|
+
raise
|
|
216
|
+
else:
|
|
217
|
+
warnings.warn(
|
|
218
|
+
f"{message}\n"
|
|
219
|
+
f"The node selector you’ve set does not meet the validation rules for the current Kubernetes version. "
|
|
220
|
+
f"Please note that invalid node selectors may cause issues with function scheduling."
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
node_selectors = node_selectors or {}
|
|
224
|
+
for key, value in node_selectors.items():
|
|
225
|
+
try:
|
|
226
|
+
verify_label_key(key, allow_k8s_prefix=True)
|
|
227
|
+
verify_label_value(value, label_key=key)
|
|
228
|
+
except mlrun.errors.MLRunInvalidArgumentError as err:
|
|
229
|
+
# An error or warning is raised by handle_invalid due to validation failure.
|
|
230
|
+
# Returning False indicates validation failed, allowing us to exit the function.
|
|
231
|
+
handle_invalid(str(err))
|
|
232
|
+
return False
|
|
233
|
+
return True
|
mlrun/launcher/client.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import abc
|
|
15
15
|
from typing import Optional
|
|
16
16
|
|
|
17
|
-
import IPython
|
|
17
|
+
import IPython.display
|
|
18
18
|
|
|
19
19
|
import mlrun.common.constants as mlrun_constants
|
|
20
20
|
import mlrun.errors
|
|
@@ -22,7 +22,7 @@ import mlrun.launcher.base as launcher
|
|
|
22
22
|
import mlrun.lists
|
|
23
23
|
import mlrun.model
|
|
24
24
|
import mlrun.runtimes
|
|
25
|
-
|
|
25
|
+
import mlrun.utils
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
@@ -128,10 +128,10 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
128
128
|
if result:
|
|
129
129
|
results_tbl.append(result)
|
|
130
130
|
else:
|
|
131
|
-
logger.info("no returned result (job may still be in progress)")
|
|
131
|
+
mlrun.utils.logger.info("no returned result (job may still be in progress)")
|
|
132
132
|
results_tbl.append(run.to_dict())
|
|
133
133
|
|
|
134
|
-
if mlrun.utils.
|
|
134
|
+
if mlrun.utils.is_jupyter and mlrun.mlconf.ipython_widget:
|
|
135
135
|
results_tbl.show()
|
|
136
136
|
print()
|
|
137
137
|
ui_url = mlrun.utils.get_ui_url(project, uid)
|
|
@@ -147,9 +147,9 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
147
147
|
project_flag = f"-p {project}" if project else ""
|
|
148
148
|
info_cmd = f"mlrun get run {uid} {project_flag}"
|
|
149
149
|
logs_cmd = f"mlrun logs {uid} {project_flag}"
|
|
150
|
-
logger.info(
|
|
150
|
+
mlrun.utils.logger.info(
|
|
151
151
|
"To track results use the CLI", info_cmd=info_cmd, logs_cmd=logs_cmd
|
|
152
152
|
)
|
|
153
153
|
ui_url = mlrun.utils.get_ui_url(project, uid)
|
|
154
154
|
if ui_url:
|
|
155
|
-
logger.info("Or click for UI", ui_url=ui_url)
|
|
155
|
+
mlrun.utils.logger.info("Or click for UI", ui_url=ui_url)
|
mlrun/model.py
CHANGED
|
@@ -681,7 +681,8 @@ class ImageBuilder(ModelObj):
|
|
|
681
681
|
class Notification(ModelObj):
|
|
682
682
|
"""Notification object
|
|
683
683
|
|
|
684
|
-
:param kind: notification implementation kind - slack, webhook, etc.
|
|
684
|
+
:param kind: notification implementation kind - slack, webhook, etc. See
|
|
685
|
+
:py:class:`mlrun.common.schemas.notification.NotificationKind`
|
|
685
686
|
:param name: for logging and identification
|
|
686
687
|
:param message: message content in the notification
|
|
687
688
|
:param severity: severity to display in the notification
|
|
@@ -219,7 +219,7 @@ class _BatchWindowGenerator:
|
|
|
219
219
|
# If the endpoint does not have a stream, `last_updated` should be
|
|
220
220
|
# the minimum between the current time and the last updated time.
|
|
221
221
|
# This compensates for the bumping mechanism - see
|
|
222
|
-
# `
|
|
222
|
+
# `update_model_endpoint_last_request`.
|
|
223
223
|
last_updated = min(int(datetime_now().timestamp()), last_updated)
|
|
224
224
|
logger.debug(
|
|
225
225
|
"The endpoint does not have a stream", last_updated=last_updated
|
|
@@ -588,7 +588,11 @@ class SQLStoreBase(StoreBase):
|
|
|
588
588
|
|
|
589
589
|
for endpoint_dict in endpoints:
|
|
590
590
|
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
|
|
591
|
-
|
|
591
|
+
logger.debug(
|
|
592
|
+
"Deleting model endpoint resources from the SQL tables",
|
|
593
|
+
endpoint_id=endpoint_id,
|
|
594
|
+
project=self.project,
|
|
595
|
+
)
|
|
592
596
|
# Delete last analyzed records
|
|
593
597
|
self._delete_last_analyzed(endpoint_id=endpoint_id)
|
|
594
598
|
|
|
@@ -598,6 +602,16 @@ class SQLStoreBase(StoreBase):
|
|
|
598
602
|
|
|
599
603
|
# Delete model endpoint record
|
|
600
604
|
self.delete_model_endpoint(endpoint_id=endpoint_id)
|
|
605
|
+
logger.debug(
|
|
606
|
+
"Successfully deleted model endpoint resources",
|
|
607
|
+
endpoint_id=endpoint_id,
|
|
608
|
+
project=self.project,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
logger.debug(
|
|
612
|
+
"Successfully deleted model monitoring endpoints resources from the SQL tables",
|
|
613
|
+
project=self.project,
|
|
614
|
+
)
|
|
601
615
|
|
|
602
616
|
def get_model_endpoint_metrics(
|
|
603
617
|
self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
|
|
@@ -305,10 +305,22 @@ class KVStoreBase(StoreBase):
|
|
|
305
305
|
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
|
|
306
306
|
else:
|
|
307
307
|
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
|
|
308
|
+
|
|
309
|
+
logger.debug(
|
|
310
|
+
"Deleting model endpoint resources from the V3IO KV table",
|
|
311
|
+
endpoint_id=endpoint_id,
|
|
312
|
+
project=self.project,
|
|
313
|
+
)
|
|
314
|
+
|
|
308
315
|
self.delete_model_endpoint(
|
|
309
316
|
endpoint_id,
|
|
310
317
|
)
|
|
311
318
|
|
|
319
|
+
logger.debug(
|
|
320
|
+
"Successfully deleted model monitoring endpoints from the V3IO KV table",
|
|
321
|
+
project=self.project,
|
|
322
|
+
)
|
|
323
|
+
|
|
312
324
|
# Delete remain records in the KV
|
|
313
325
|
all_records = self.client.kv.new_cursor(
|
|
314
326
|
container=self.container,
|
|
@@ -163,8 +163,8 @@ class TDEngineSchema:
|
|
|
163
163
|
@staticmethod
|
|
164
164
|
def _get_records_query(
|
|
165
165
|
table: str,
|
|
166
|
-
start: datetime,
|
|
167
|
-
end: datetime,
|
|
166
|
+
start: datetime.datetime,
|
|
167
|
+
end: datetime.datetime,
|
|
168
168
|
columns_to_filter: list[str] = None,
|
|
169
169
|
filter_query: Optional[str] = None,
|
|
170
170
|
interval: Optional[str] = None,
|
|
@@ -63,7 +63,6 @@ def get_stream_path(
|
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
if not stream_uri or stream_uri == "v3io":
|
|
66
|
-
# TODO : remove the first part of this condition in 1.9.0
|
|
67
66
|
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
68
67
|
project=project,
|
|
69
68
|
kind=mm_constants.FileTargetKind.STREAM,
|
|
@@ -71,8 +70,6 @@ def get_stream_path(
|
|
|
71
70
|
function_name=function_name,
|
|
72
71
|
)
|
|
73
72
|
|
|
74
|
-
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
75
|
-
stream_uri = stream_uri[1] # get new stream path, under projects
|
|
76
73
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
77
74
|
stream_uri=stream_uri, project=project, function_name=function_name
|
|
78
75
|
)
|
|
@@ -179,7 +176,7 @@ def _get_monitoring_time_window_from_controller_run(
|
|
|
179
176
|
def update_model_endpoint_last_request(
|
|
180
177
|
project: str,
|
|
181
178
|
model_endpoint: ModelEndpoint,
|
|
182
|
-
current_request: datetime,
|
|
179
|
+
current_request: datetime.datetime,
|
|
183
180
|
db: "RunDBInterface",
|
|
184
181
|
) -> None:
|
|
185
182
|
"""
|
|
@@ -190,7 +187,8 @@ def update_model_endpoint_last_request(
|
|
|
190
187
|
:param current_request: current request time
|
|
191
188
|
:param db: DB interface.
|
|
192
189
|
"""
|
|
193
|
-
|
|
190
|
+
is_model_server_endpoint = model_endpoint.spec.stream_path != ""
|
|
191
|
+
if is_model_server_endpoint:
|
|
194
192
|
current_request = current_request.isoformat()
|
|
195
193
|
logger.info(
|
|
196
194
|
"Update model endpoint last request time (EP with serving)",
|
|
@@ -204,12 +202,13 @@ def update_model_endpoint_last_request(
|
|
|
204
202
|
endpoint_id=model_endpoint.metadata.uid,
|
|
205
203
|
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
206
204
|
)
|
|
207
|
-
else:
|
|
205
|
+
else: # model endpoint without any serving function - close the window "manually"
|
|
208
206
|
try:
|
|
209
207
|
time_window = _get_monitoring_time_window_from_controller_run(project, db)
|
|
210
208
|
except mlrun.errors.MLRunNotFoundError:
|
|
211
|
-
logger.
|
|
212
|
-
"Not bumping model endpoint last request time - the monitoring controller isn't deployed yet"
|
|
209
|
+
logger.warn(
|
|
210
|
+
"Not bumping model endpoint last request time - the monitoring controller isn't deployed yet.\n"
|
|
211
|
+
"Call `project.enable_model_monitoring()` first."
|
|
213
212
|
)
|
|
214
213
|
return
|
|
215
214
|
|
|
@@ -265,13 +264,6 @@ def calculate_inputs_statistics(
|
|
|
265
264
|
counts.tolist(),
|
|
266
265
|
bins.tolist(),
|
|
267
266
|
]
|
|
268
|
-
elif "hist" in inputs_statistics[feature]:
|
|
269
|
-
# Comply with the other common features' histogram length
|
|
270
|
-
mlrun.common.model_monitoring.helpers.pad_hist(
|
|
271
|
-
mlrun.common.model_monitoring.helpers.Histogram(
|
|
272
|
-
inputs_statistics[feature]["hist"]
|
|
273
|
-
)
|
|
274
|
-
)
|
|
275
267
|
else:
|
|
276
268
|
# If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
|
|
277
269
|
inputs_statistics.pop(feature)
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -160,7 +160,9 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
160
160
|
event_kind = f"{event_kind}_detected"
|
|
161
161
|
else:
|
|
162
162
|
event_kind = f"{event_kind}_suspected"
|
|
163
|
-
return alert_objects.EventKind(
|
|
163
|
+
return alert_objects.EventKind(
|
|
164
|
+
value=mlrun.utils.helpers.normalize_name(event_kind)
|
|
165
|
+
)
|
|
164
166
|
|
|
165
167
|
@staticmethod
|
|
166
168
|
def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, WriterEventKind]:
|
|
@@ -258,9 +260,13 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
258
260
|
"data drift app",
|
|
259
261
|
endpoint_id=endpoint_id,
|
|
260
262
|
)
|
|
263
|
+
attributes = json.loads(event[ResultData.RESULT_EXTRA_DATA])
|
|
264
|
+
attributes[EventFieldType.DRIFT_STATUS] = str(
|
|
265
|
+
attributes[EventFieldType.DRIFT_STATUS]
|
|
266
|
+
)
|
|
261
267
|
self._app_result_store.update_model_endpoint(
|
|
262
268
|
endpoint_id=endpoint_id,
|
|
263
|
-
attributes=
|
|
269
|
+
attributes=attributes,
|
|
264
270
|
)
|
|
265
271
|
|
|
266
272
|
logger.info("Model monitoring writer finished handling event")
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -80,6 +80,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
80
80
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
81
81
|
cleanup_ttl: typing.Optional[int] = None,
|
|
82
82
|
image: typing.Optional[str] = None,
|
|
83
|
+
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
|
|
83
84
|
):
|
|
84
85
|
self.engine = engine
|
|
85
86
|
self.code = code
|
|
@@ -93,6 +94,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
93
94
|
self._tmp_path = None
|
|
94
95
|
self.schedule = schedule
|
|
95
96
|
self.image = image
|
|
97
|
+
self.workflow_runner_node_selector = workflow_runner_node_selector
|
|
96
98
|
|
|
97
99
|
def get_source_file(self, context=""):
|
|
98
100
|
if not self.code and not self.path:
|