mlrun 1.9.0rc2__py3-none-any.whl → 1.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/schemas/__init__.py +0 -1
- mlrun/common/__init__.py +0 -1
- mlrun/common/db/__init__.py +0 -1
- mlrun/common/db/sql_session.py +0 -1
- mlrun/common/formatters/__init__.py +0 -1
- mlrun/common/formatters/artifact.py +0 -1
- mlrun/common/formatters/base.py +0 -1
- mlrun/common/formatters/feature_set.py +0 -1
- mlrun/common/formatters/function.py +0 -1
- mlrun/common/formatters/model_endpoint.py +0 -1
- mlrun/common/formatters/pipeline.py +0 -1
- mlrun/common/formatters/project.py +0 -1
- mlrun/common/formatters/run.py +0 -2
- mlrun/common/runtimes/constants.py +1 -1
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/alert.py +1 -1
- mlrun/common/schemas/api_gateway.py +1 -1
- mlrun/common/schemas/artifact.py +1 -1
- mlrun/common/schemas/auth.py +1 -1
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +1 -1
- mlrun/common/schemas/clusterization_spec.py +1 -1
- mlrun/common/schemas/constants.py +1 -1
- mlrun/common/schemas/datastore_profile.py +0 -1
- mlrun/common/schemas/events.py +1 -1
- mlrun/common/schemas/feature_store.py +1 -1
- mlrun/common/schemas/frontend_spec.py +1 -1
- mlrun/common/schemas/function.py +1 -1
- mlrun/common/schemas/http.py +1 -1
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +1 -1
- mlrun/common/schemas/memory_reports.py +0 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +32 -8
- mlrun/common/schemas/notification.py +4 -0
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/partition.py +1 -1
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/project.py +1 -1
- mlrun/common/schemas/regex.py +1 -1
- mlrun/common/schemas/runtime_resource.py +1 -1
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +0 -1
- mlrun/common/schemas/workflow.py +1 -1
- mlrun/common/secrets.py +0 -1
- mlrun/config.py +9 -17
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +1 -1
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/snowflake_utils.py +0 -1
- mlrun/datastore/spark_utils.py +0 -1
- mlrun/datastore/utils.py +1 -1
- mlrun/db/base.py +2 -0
- mlrun/db/httpdb.py +29 -19
- mlrun/db/nopdb.py +2 -1
- mlrun/errors.py +1 -1
- mlrun/execution.py +21 -9
- mlrun/feature_store/feature_set.py +0 -12
- mlrun/feature_store/retrieval/base.py +1 -1
- mlrun/feature_store/retrieval/dask_merger.py +1 -1
- mlrun/feature_store/retrieval/job.py +1 -1
- mlrun/feature_store/retrieval/spark_merger.py +0 -2
- mlrun/feature_store/steps.py +1 -1
- mlrun/features.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +3 -3
- mlrun/frameworks/_common/producer.py +0 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +0 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +0 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +1 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +0 -1
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +0 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +0 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +0 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +0 -14
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +340 -0
- mlrun/launcher/base.py +3 -3
- mlrun/launcher/local.py +2 -2
- mlrun/launcher/remote.py +2 -2
- mlrun/model.py +14 -0
- mlrun/model_monitoring/applications/__init__.py +0 -1
- mlrun/model_monitoring/applications/_application_steps.py +3 -1
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/tsdb/base.py +3 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +27 -49
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -30
- mlrun/model_monitoring/stream_processing.py +7 -11
- mlrun/package/context_handler.py +1 -1
- mlrun/package/errors.py +1 -1
- mlrun/package/packager.py +1 -1
- mlrun/package/packagers/default_packager.py +1 -1
- mlrun/package/packagers/numpy_packagers.py +1 -1
- mlrun/package/packagers/pandas_packagers.py +1 -1
- mlrun/package/packagers/python_standard_library_packagers.py +1 -1
- mlrun/package/packagers_manager.py +1 -1
- mlrun/package/utils/_archiver.py +1 -1
- mlrun/package/utils/_formatter.py +1 -1
- mlrun/package/utils/_pickler.py +1 -1
- mlrun/package/utils/_supported_format.py +1 -1
- mlrun/package/utils/log_hint_utils.py +1 -1
- mlrun/package/utils/type_hint_utils.py +1 -1
- mlrun/projects/operations.py +36 -21
- mlrun/projects/project.py +82 -74
- mlrun/run.py +1 -1
- mlrun/runtimes/base.py +16 -6
- mlrun/runtimes/daskjob.py +2 -1
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +0 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
- mlrun/runtimes/databricks_job/databricks_wrapper.py +0 -1
- mlrun/runtimes/mounts.py +2 -0
- mlrun/runtimes/nuclio/function.py +6 -1
- mlrun/runtimes/nuclio/serving.py +1 -1
- mlrun/runtimes/pod.py +4 -349
- mlrun/runtimes/sparkjob/spark3job.py +0 -12
- mlrun/serving/merger.py +0 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +6 -3
- mlrun/serving/utils.py +1 -1
- mlrun/utils/async_http.py +0 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/db.py +1 -1
- mlrun/utils/helpers.py +3 -1
- mlrun/utils/http.py +0 -1
- mlrun/utils/notifications/notification/webhook.py +18 -2
- mlrun/utils/regex.py +0 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/vault.py +1 -1
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/METADATA +7 -11
- mlrun-1.10.0rc1.dist-info/RECORD +351 -0
- {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/WHEEL +1 -1
- mlrun-1.9.0rc2.dist-info/RECORD +0 -350
- {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py
CHANGED
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import copy
|
|
14
15
|
import re
|
|
16
|
+
import typing
|
|
15
17
|
import warnings
|
|
16
18
|
|
|
17
19
|
import kubernetes.client
|
|
@@ -228,3 +230,341 @@ def validate_node_selectors(
|
|
|
228
230
|
handle_invalid(str(err))
|
|
229
231
|
return False
|
|
230
232
|
return True
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def enrich_preemption_mode(
|
|
236
|
+
preemption_mode: typing.Optional[str],
|
|
237
|
+
node_selector: dict[str, str],
|
|
238
|
+
tolerations: list[kubernetes.client.V1Toleration],
|
|
239
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
240
|
+
) -> tuple[
|
|
241
|
+
dict[str, str],
|
|
242
|
+
list[kubernetes.client.V1Toleration],
|
|
243
|
+
typing.Optional[kubernetes.client.V1Affinity],
|
|
244
|
+
]:
|
|
245
|
+
"""
|
|
246
|
+
Enriches a pod spec's scheduling configuration (node selector, tolerations, affinity)
|
|
247
|
+
based on the provided preemption mode.
|
|
248
|
+
|
|
249
|
+
If no preemptible node configuration is defined in the system, or the mode is `none`,
|
|
250
|
+
the original values are returned unchanged.
|
|
251
|
+
|
|
252
|
+
Modes:
|
|
253
|
+
- allow: Adds tolerations, removes preemption constraints.
|
|
254
|
+
- constrain: Requires preemptible node affinity and adds tolerations.
|
|
255
|
+
- prevent: Enforces scheduling on non-preemptible nodes using taints or anti-affinity.
|
|
256
|
+
- none: No enrichment is applied.
|
|
257
|
+
"""
|
|
258
|
+
if (
|
|
259
|
+
not mlconfig.is_preemption_nodes_configured()
|
|
260
|
+
or preemption_mode == mlrun.common.schemas.PreemptionModes.none.value
|
|
261
|
+
):
|
|
262
|
+
return node_selector, tolerations, affinity
|
|
263
|
+
|
|
264
|
+
if not preemption_mode:
|
|
265
|
+
preemption_mode = mlconfig.function_defaults.preemption_mode
|
|
266
|
+
mlrun.utils.logger.debug(
|
|
267
|
+
"No preemption mode provided, using default",
|
|
268
|
+
default_preemption_mode=preemption_mode,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
enriched_node_selector = copy.deepcopy(node_selector or {})
|
|
272
|
+
enriched_tolerations = copy.deepcopy(tolerations or [])
|
|
273
|
+
enriched_affinity = copy.deepcopy(affinity)
|
|
274
|
+
preemptible_tolerations = generate_preemptible_tolerations()
|
|
275
|
+
|
|
276
|
+
if handler := _get_mode_handler(preemption_mode):
|
|
277
|
+
enriched_node_selector, enriched_tolerations, enriched_affinity = handler(
|
|
278
|
+
enriched_node_selector,
|
|
279
|
+
enriched_tolerations,
|
|
280
|
+
enriched_affinity,
|
|
281
|
+
preemptible_tolerations,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return (
|
|
285
|
+
enriched_node_selector,
|
|
286
|
+
enriched_tolerations,
|
|
287
|
+
_prune_empty_affinity(enriched_affinity),
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _get_mode_handler(mode: str):
|
|
292
|
+
return {
|
|
293
|
+
mlrun.common.schemas.PreemptionModes.prevent: _handle_prevent_mode,
|
|
294
|
+
mlrun.common.schemas.PreemptionModes.constrain: _handle_constrain_mode,
|
|
295
|
+
mlrun.common.schemas.PreemptionModes.allow: _handle_allow_mode,
|
|
296
|
+
}.get(mode)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _handle_prevent_mode(
|
|
300
|
+
node_selector: dict[str, str],
|
|
301
|
+
tolerations: list[kubernetes.client.V1Toleration],
|
|
302
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
303
|
+
preemptible_tolerations: list[kubernetes.client.V1Toleration],
|
|
304
|
+
) -> tuple[
|
|
305
|
+
dict[str, str],
|
|
306
|
+
list[kubernetes.client.V1Toleration],
|
|
307
|
+
typing.Optional[kubernetes.client.V1Affinity],
|
|
308
|
+
]:
|
|
309
|
+
# Ensure no preemptible node tolerations
|
|
310
|
+
tolerations = [t for t in tolerations if t not in preemptible_tolerations]
|
|
311
|
+
|
|
312
|
+
# Purge affinity preemption-related configuration
|
|
313
|
+
affinity = _prune_affinity_node_selector_requirement(
|
|
314
|
+
generate_preemptible_node_selector_requirements(
|
|
315
|
+
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value
|
|
316
|
+
),
|
|
317
|
+
affinity=affinity,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Remove preemptible nodes constraint
|
|
321
|
+
node_selector = _prune_node_selector(
|
|
322
|
+
mlconfig.get_preemptible_node_selector(),
|
|
323
|
+
enriched_node_selector=node_selector,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Use anti-affinity only if no tolerations configured
|
|
327
|
+
if not preemptible_tolerations:
|
|
328
|
+
affinity = _override_required_during_scheduling_ignored_during_execution(
|
|
329
|
+
kubernetes.client.V1NodeSelector(
|
|
330
|
+
node_selector_terms=generate_preemptible_nodes_anti_affinity_terms()
|
|
331
|
+
),
|
|
332
|
+
affinity,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return node_selector, tolerations, affinity
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _handle_constrain_mode(
|
|
339
|
+
node_selector: dict[str, str],
|
|
340
|
+
tolerations: list[kubernetes.client.V1Toleration],
|
|
341
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
342
|
+
preemptible_tolerations: list[kubernetes.client.V1Toleration],
|
|
343
|
+
) -> tuple[
|
|
344
|
+
dict[str, str],
|
|
345
|
+
list[kubernetes.client.V1Toleration],
|
|
346
|
+
typing.Optional[kubernetes.client.V1Affinity],
|
|
347
|
+
]:
|
|
348
|
+
tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
|
|
349
|
+
|
|
350
|
+
affinity = _override_required_during_scheduling_ignored_during_execution(
|
|
351
|
+
kubernetes.client.V1NodeSelector(
|
|
352
|
+
node_selector_terms=generate_preemptible_nodes_affinity_terms()
|
|
353
|
+
),
|
|
354
|
+
affinity=affinity,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
return node_selector, tolerations, affinity
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _handle_allow_mode(
|
|
361
|
+
node_selector: dict[str, str],
|
|
362
|
+
tolerations: list[kubernetes.client.V1Toleration],
|
|
363
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
364
|
+
preemptible_tolerations: list[kubernetes.client.V1Toleration],
|
|
365
|
+
) -> tuple[
|
|
366
|
+
dict[str, str],
|
|
367
|
+
list[kubernetes.client.V1Toleration],
|
|
368
|
+
typing.Optional[kubernetes.client.V1Affinity],
|
|
369
|
+
]:
|
|
370
|
+
for op in [
|
|
371
|
+
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
|
|
372
|
+
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
|
|
373
|
+
]:
|
|
374
|
+
affinity = _prune_affinity_node_selector_requirement(
|
|
375
|
+
generate_preemptible_node_selector_requirements(op),
|
|
376
|
+
affinity=affinity,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
node_selector = _prune_node_selector(
|
|
380
|
+
mlconfig.get_preemptible_node_selector(),
|
|
381
|
+
enriched_node_selector=node_selector,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
|
|
385
|
+
return node_selector, tolerations, affinity
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _merge_tolerations(
|
|
389
|
+
existing: list[kubernetes.client.V1Toleration],
|
|
390
|
+
to_add: list[kubernetes.client.V1Toleration],
|
|
391
|
+
) -> list[kubernetes.client.V1Toleration]:
|
|
392
|
+
for toleration in to_add:
|
|
393
|
+
if toleration not in existing:
|
|
394
|
+
existing.append(toleration)
|
|
395
|
+
return existing
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _prune_node_selector(
|
|
399
|
+
node_selector: dict[str, str],
|
|
400
|
+
enriched_node_selector: dict[str, str],
|
|
401
|
+
):
|
|
402
|
+
"""
|
|
403
|
+
Prunes given node_selector key from function spec if their key and value are matching
|
|
404
|
+
:param node_selector: node selectors to prune
|
|
405
|
+
"""
|
|
406
|
+
# both needs to exists to prune required node_selector from the spec node selector
|
|
407
|
+
if not node_selector or not enriched_node_selector:
|
|
408
|
+
return
|
|
409
|
+
|
|
410
|
+
mlrun.utils.logger.debug("Pruning node selectors", node_selector=node_selector)
|
|
411
|
+
return {
|
|
412
|
+
key: value
|
|
413
|
+
for key, value in enriched_node_selector.items()
|
|
414
|
+
if node_selector.get(key) != value
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _prune_affinity_node_selector_requirement(
|
|
419
|
+
node_selector_requirements: list[kubernetes.client.V1NodeSelectorRequirement],
|
|
420
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
421
|
+
):
|
|
422
|
+
"""
|
|
423
|
+
Prunes given node selector requirements from affinity.
|
|
424
|
+
We are only editing required_during_scheduling_ignored_during_execution because the scheduler can't schedule
|
|
425
|
+
the pod unless the rule is met.
|
|
426
|
+
:param node_selector_requirements:
|
|
427
|
+
:return:
|
|
428
|
+
"""
|
|
429
|
+
# both needs to exist to prune required affinity from spec affinity
|
|
430
|
+
if not affinity or not node_selector_requirements:
|
|
431
|
+
return
|
|
432
|
+
if affinity.node_affinity:
|
|
433
|
+
node_affinity: kubernetes.client.V1NodeAffinity = affinity.node_affinity
|
|
434
|
+
|
|
435
|
+
new_required_during_scheduling_ignored_during_execution = None
|
|
436
|
+
if node_affinity.required_during_scheduling_ignored_during_execution:
|
|
437
|
+
node_selector: kubernetes.client.V1NodeSelector = (
|
|
438
|
+
node_affinity.required_during_scheduling_ignored_during_execution
|
|
439
|
+
)
|
|
440
|
+
new_node_selector_terms = (
|
|
441
|
+
_prune_node_selector_requirements_from_node_selector_terms(
|
|
442
|
+
node_selector_terms=node_selector.node_selector_terms,
|
|
443
|
+
requirements_to_prune=node_selector_requirements,
|
|
444
|
+
)
|
|
445
|
+
)
|
|
446
|
+
# check whether there are node selector terms to add to the new list of required terms
|
|
447
|
+
if new_node_selector_terms:
|
|
448
|
+
new_required_during_scheduling_ignored_during_execution = (
|
|
449
|
+
kubernetes.client.V1NodeSelector(
|
|
450
|
+
node_selector_terms=new_node_selector_terms
|
|
451
|
+
)
|
|
452
|
+
)
|
|
453
|
+
# if both preferred and new required are empty, clean node_affinity
|
|
454
|
+
if (
|
|
455
|
+
not node_affinity.preferred_during_scheduling_ignored_during_execution
|
|
456
|
+
and not new_required_during_scheduling_ignored_during_execution
|
|
457
|
+
):
|
|
458
|
+
affinity.node_affinity = None
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
_initialize_affinity(affinity=affinity)
|
|
462
|
+
_initialize_node_affinity(affinity=affinity)
|
|
463
|
+
|
|
464
|
+
affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
|
|
465
|
+
new_required_during_scheduling_ignored_during_execution
|
|
466
|
+
)
|
|
467
|
+
return affinity
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def _prune_node_selector_requirements_from_node_selector_terms(
|
|
471
|
+
node_selector_terms: list[kubernetes.client.V1NodeSelectorTerm],
|
|
472
|
+
requirements_to_prune: list[kubernetes.client.V1NodeSelectorRequirement],
|
|
473
|
+
) -> list[kubernetes.client.V1NodeSelectorTerm]:
|
|
474
|
+
"""
|
|
475
|
+
Removes matching node selector requirements from the given list of node selector terms.
|
|
476
|
+
|
|
477
|
+
Each term may contain multiple match expressions. This function iterates over each expression,
|
|
478
|
+
and removes any that exactly match one of the requirements provided.
|
|
479
|
+
|
|
480
|
+
:param node_selector_terms: List of V1NodeSelectorTerm objects to be processed.
|
|
481
|
+
:param requirements_to_prune: List of V1NodeSelectorRequirement objects to remove.
|
|
482
|
+
:return: A new list of V1NodeSelectorTerm objects with the specified requirements pruned.
|
|
483
|
+
"""
|
|
484
|
+
pruned_terms = []
|
|
485
|
+
|
|
486
|
+
for term in node_selector_terms:
|
|
487
|
+
remaining_requirements = [
|
|
488
|
+
expr
|
|
489
|
+
for expr in term.match_expressions or []
|
|
490
|
+
if expr not in requirements_to_prune
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
# Only add term if there are remaining match expressions or match fields
|
|
494
|
+
if remaining_requirements or term.match_fields:
|
|
495
|
+
pruned_terms.append(
|
|
496
|
+
kubernetes.client.V1NodeSelectorTerm(
|
|
497
|
+
match_expressions=remaining_requirements,
|
|
498
|
+
match_fields=term.match_fields,
|
|
499
|
+
)
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
return pruned_terms
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _override_required_during_scheduling_ignored_during_execution(
|
|
506
|
+
node_selector: kubernetes.client.V1NodeSelector,
|
|
507
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
508
|
+
):
|
|
509
|
+
affinity = _initialize_affinity(affinity)
|
|
510
|
+
affinity = _initialize_node_affinity(affinity)
|
|
511
|
+
affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
|
|
512
|
+
node_selector
|
|
513
|
+
)
|
|
514
|
+
return affinity
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _initialize_affinity(
|
|
518
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
519
|
+
) -> kubernetes.client.V1Affinity:
|
|
520
|
+
return affinity or kubernetes.client.V1Affinity()
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def _initialize_node_affinity(
|
|
524
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
525
|
+
) -> kubernetes.client.V1Affinity:
|
|
526
|
+
affinity = affinity or kubernetes.client.V1Affinity()
|
|
527
|
+
affinity.node_affinity = (
|
|
528
|
+
affinity.node_affinity or kubernetes.client.V1NodeAffinity()
|
|
529
|
+
)
|
|
530
|
+
return affinity
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _prune_empty_affinity(
|
|
534
|
+
affinity: typing.Optional[kubernetes.client.V1Affinity],
|
|
535
|
+
) -> typing.Optional[kubernetes.client.V1Affinity]:
|
|
536
|
+
"""
|
|
537
|
+
Return None if the given affinity object has no meaningful constraints.
|
|
538
|
+
|
|
539
|
+
Keeps the affinity object only if it contains:
|
|
540
|
+
- Any pod affinity or pod anti-affinity
|
|
541
|
+
- Preferred node affinity
|
|
542
|
+
- Required node affinity with at least one match expression or match field
|
|
543
|
+
"""
|
|
544
|
+
if not affinity:
|
|
545
|
+
return None
|
|
546
|
+
|
|
547
|
+
node_affinity = affinity.node_affinity
|
|
548
|
+
pod_affinity = affinity.pod_affinity
|
|
549
|
+
pod_anti_affinity = affinity.pod_anti_affinity
|
|
550
|
+
|
|
551
|
+
# If any pod affinity exists, keep the object
|
|
552
|
+
if pod_affinity or pod_anti_affinity:
|
|
553
|
+
return affinity
|
|
554
|
+
|
|
555
|
+
# If node affinity exists, check if it has any meaningful content
|
|
556
|
+
if node_affinity:
|
|
557
|
+
required = node_affinity.required_during_scheduling_ignored_during_execution
|
|
558
|
+
preferred = node_affinity.preferred_during_scheduling_ignored_during_execution
|
|
559
|
+
|
|
560
|
+
if preferred:
|
|
561
|
+
return affinity
|
|
562
|
+
|
|
563
|
+
if required and required.node_selector_terms:
|
|
564
|
+
for term in required.node_selector_terms:
|
|
565
|
+
if term.match_expressions or term.match_fields:
|
|
566
|
+
return affinity # at least one term has meaningful constraints
|
|
567
|
+
|
|
568
|
+
# At this point, none of the affinity sections contain meaningful constraints,
|
|
569
|
+
# so the affinity object is effectively empty and can be safely discarded.
|
|
570
|
+
return None
|
mlrun/launcher/base.py
CHANGED
|
@@ -57,6 +57,7 @@ class BaseLauncher(abc.ABC):
|
|
|
57
57
|
out_path: Optional[str] = "",
|
|
58
58
|
workdir: Optional[str] = "",
|
|
59
59
|
artifact_path: Optional[str] = "",
|
|
60
|
+
output_path: Optional[str] = "",
|
|
60
61
|
watch: Optional[bool] = True,
|
|
61
62
|
schedule: Optional[
|
|
62
63
|
Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
|
|
@@ -234,8 +235,7 @@ class BaseLauncher(abc.ABC):
|
|
|
234
235
|
hyper_param_options=None,
|
|
235
236
|
verbose=None,
|
|
236
237
|
scrape_metrics=None,
|
|
237
|
-
|
|
238
|
-
artifact_path=None,
|
|
238
|
+
output_path=None,
|
|
239
239
|
workdir=None,
|
|
240
240
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
241
241
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
@@ -301,7 +301,7 @@ class BaseLauncher(abc.ABC):
|
|
|
301
301
|
meta = run.metadata
|
|
302
302
|
meta.uid = meta.uid or uuid.uuid4().hex
|
|
303
303
|
|
|
304
|
-
run.spec.output_path =
|
|
304
|
+
run.spec.output_path = output_path or run.spec.output_path
|
|
305
305
|
|
|
306
306
|
if not run.spec.output_path:
|
|
307
307
|
if run.metadata.project:
|
mlrun/launcher/local.py
CHANGED
|
@@ -55,6 +55,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
55
55
|
out_path: Optional[str] = "",
|
|
56
56
|
workdir: Optional[str] = "",
|
|
57
57
|
artifact_path: Optional[str] = "",
|
|
58
|
+
output_path: Optional[str] = "",
|
|
58
59
|
watch: Optional[bool] = True,
|
|
59
60
|
schedule: Optional[
|
|
60
61
|
Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
|
|
@@ -116,8 +117,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
116
117
|
hyper_param_options=hyper_param_options,
|
|
117
118
|
verbose=verbose,
|
|
118
119
|
scrape_metrics=scrape_metrics,
|
|
119
|
-
|
|
120
|
-
artifact_path=artifact_path,
|
|
120
|
+
output_path=output_path,
|
|
121
121
|
workdir=workdir,
|
|
122
122
|
notifications=notifications,
|
|
123
123
|
state_thresholds=state_thresholds,
|
mlrun/launcher/remote.py
CHANGED
|
@@ -45,6 +45,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
|
|
|
45
45
|
out_path: Optional[str] = "",
|
|
46
46
|
workdir: Optional[str] = "",
|
|
47
47
|
artifact_path: Optional[str] = "",
|
|
48
|
+
output_path: Optional[str] = "",
|
|
48
49
|
watch: Optional[bool] = True,
|
|
49
50
|
schedule: Optional[
|
|
50
51
|
Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
|
|
@@ -77,8 +78,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
|
|
|
77
78
|
hyper_param_options=hyper_param_options,
|
|
78
79
|
verbose=verbose,
|
|
79
80
|
scrape_metrics=scrape_metrics,
|
|
80
|
-
|
|
81
|
-
artifact_path=artifact_path,
|
|
81
|
+
output_path=output_path,
|
|
82
82
|
workdir=workdir,
|
|
83
83
|
notifications=notifications,
|
|
84
84
|
state_thresholds=state_thresholds,
|
mlrun/model.py
CHANGED
|
@@ -929,6 +929,8 @@ class RunSpec(ModelObj):
|
|
|
929
929
|
|
|
930
930
|
_fields_to_serialize = ModelObj._fields_to_serialize + [
|
|
931
931
|
"handler",
|
|
932
|
+
"affinity",
|
|
933
|
+
"tolerations",
|
|
932
934
|
]
|
|
933
935
|
|
|
934
936
|
def __init__(
|
|
@@ -956,6 +958,8 @@ class RunSpec(ModelObj):
|
|
|
956
958
|
state_thresholds=None,
|
|
957
959
|
reset_on_run=None,
|
|
958
960
|
node_selector=None,
|
|
961
|
+
tolerations=None,
|
|
962
|
+
affinity=None,
|
|
959
963
|
):
|
|
960
964
|
# A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
|
|
961
965
|
# keys (parameter names) and the values are the type hint given in the input keys after the colon.
|
|
@@ -994,6 +998,8 @@ class RunSpec(ModelObj):
|
|
|
994
998
|
self.state_thresholds = state_thresholds or {}
|
|
995
999
|
self.reset_on_run = reset_on_run
|
|
996
1000
|
self.node_selector = node_selector or {}
|
|
1001
|
+
self.tolerations = tolerations or {}
|
|
1002
|
+
self.affinity = affinity or {}
|
|
997
1003
|
|
|
998
1004
|
def _serialize_field(
|
|
999
1005
|
self, struct: dict, field_name: Optional[str] = None, strip: bool = False
|
|
@@ -1003,6 +1009,14 @@ class RunSpec(ModelObj):
|
|
|
1003
1009
|
if self.handler and isinstance(self.handler, str):
|
|
1004
1010
|
return self.handler
|
|
1005
1011
|
return None
|
|
1012
|
+
|
|
1013
|
+
# Properly serialize known K8s objects
|
|
1014
|
+
if field_name in {"affinity", "tolerations"}:
|
|
1015
|
+
value = getattr(self, field_name, None)
|
|
1016
|
+
if hasattr(value, "to_dict"):
|
|
1017
|
+
return value.to_dict()
|
|
1018
|
+
return value
|
|
1019
|
+
|
|
1006
1020
|
return super()._serialize_field(struct, field_name, strip)
|
|
1007
1021
|
|
|
1008
1022
|
def is_hyper_job(self):
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
from .base import ModelMonitoringApplicationBase
|
|
17
16
|
from .context import MonitoringApplicationContext
|
|
@@ -96,7 +96,9 @@ class _PushToMonitoringWriter(StepToDict):
|
|
|
96
96
|
logger.debug(
|
|
97
97
|
"Pushing data to output stream", writer_event=str(writer_event)
|
|
98
98
|
)
|
|
99
|
-
self.output_stream.push(
|
|
99
|
+
self.output_stream.push(
|
|
100
|
+
[writer_event], partition_key=application_context.endpoint_id
|
|
101
|
+
)
|
|
100
102
|
logger.debug("Pushed data to output stream successfully")
|
|
101
103
|
|
|
102
104
|
def _lazy_init(self):
|
|
@@ -673,7 +673,9 @@ class MonitoringApplicationController:
|
|
|
673
673
|
"""
|
|
674
674
|
logger.info("Starting monitoring controller chief")
|
|
675
675
|
applications_names = []
|
|
676
|
-
endpoints = self.project_obj.list_model_endpoints(
|
|
676
|
+
endpoints = self.project_obj.list_model_endpoints(
|
|
677
|
+
metric_list=["last_request"]
|
|
678
|
+
).endpoints
|
|
677
679
|
if not endpoints:
|
|
678
680
|
logger.info("No model endpoints found", project=self.project)
|
|
679
681
|
return
|
|
@@ -82,7 +82,8 @@ class TSDBConnector(ABC):
|
|
|
82
82
|
|
|
83
83
|
@abstractmethod
|
|
84
84
|
def delete_tsdb_records(
|
|
85
|
-
self,
|
|
85
|
+
self,
|
|
86
|
+
endpoint_ids: list[str],
|
|
86
87
|
) -> None:
|
|
87
88
|
"""
|
|
88
89
|
Delete model endpoint records from the TSDB connector.
|
|
@@ -332,6 +333,7 @@ class TSDBConnector(ABC):
|
|
|
332
333
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
333
334
|
project: str,
|
|
334
335
|
run_in_threadpool: Callable,
|
|
336
|
+
metric_list: Optional[list[str]] = None,
|
|
335
337
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
336
338
|
raise NotImplementedError()
|
|
337
339
|
|