mlrun 1.9.0rc2__py3-none-any.whl → 1.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (190) hide show
  1. mlrun/api/schemas/__init__.py +0 -1
  2. mlrun/common/__init__.py +0 -1
  3. mlrun/common/db/__init__.py +0 -1
  4. mlrun/common/db/sql_session.py +0 -1
  5. mlrun/common/formatters/__init__.py +0 -1
  6. mlrun/common/formatters/artifact.py +0 -1
  7. mlrun/common/formatters/base.py +0 -1
  8. mlrun/common/formatters/feature_set.py +0 -1
  9. mlrun/common/formatters/function.py +0 -1
  10. mlrun/common/formatters/model_endpoint.py +0 -1
  11. mlrun/common/formatters/pipeline.py +0 -1
  12. mlrun/common/formatters/project.py +0 -1
  13. mlrun/common/formatters/run.py +0 -2
  14. mlrun/common/runtimes/constants.py +1 -1
  15. mlrun/common/schemas/__init__.py +1 -0
  16. mlrun/common/schemas/alert.py +1 -1
  17. mlrun/common/schemas/api_gateway.py +1 -1
  18. mlrun/common/schemas/artifact.py +1 -1
  19. mlrun/common/schemas/auth.py +1 -1
  20. mlrun/common/schemas/background_task.py +1 -1
  21. mlrun/common/schemas/client_spec.py +1 -1
  22. mlrun/common/schemas/clusterization_spec.py +1 -1
  23. mlrun/common/schemas/constants.py +1 -1
  24. mlrun/common/schemas/datastore_profile.py +0 -1
  25. mlrun/common/schemas/events.py +1 -1
  26. mlrun/common/schemas/feature_store.py +1 -1
  27. mlrun/common/schemas/frontend_spec.py +1 -1
  28. mlrun/common/schemas/function.py +1 -1
  29. mlrun/common/schemas/http.py +1 -1
  30. mlrun/common/schemas/hub.py +1 -1
  31. mlrun/common/schemas/k8s.py +1 -1
  32. mlrun/common/schemas/memory_reports.py +0 -1
  33. mlrun/common/schemas/model_monitoring/model_endpoints.py +32 -8
  34. mlrun/common/schemas/notification.py +4 -0
  35. mlrun/common/schemas/object.py +1 -1
  36. mlrun/common/schemas/partition.py +1 -1
  37. mlrun/common/schemas/pipeline.py +1 -1
  38. mlrun/common/schemas/project.py +1 -1
  39. mlrun/common/schemas/regex.py +1 -1
  40. mlrun/common/schemas/runtime_resource.py +1 -1
  41. mlrun/common/schemas/schedule.py +1 -1
  42. mlrun/common/schemas/secret.py +1 -1
  43. mlrun/common/schemas/tag.py +0 -1
  44. mlrun/common/schemas/workflow.py +1 -1
  45. mlrun/common/secrets.py +0 -1
  46. mlrun/config.py +9 -17
  47. mlrun/data_types/infer.py +1 -1
  48. mlrun/data_types/spark.py +1 -1
  49. mlrun/datastore/datastore.py +1 -1
  50. mlrun/datastore/snowflake_utils.py +0 -1
  51. mlrun/datastore/spark_utils.py +0 -1
  52. mlrun/datastore/utils.py +1 -1
  53. mlrun/db/base.py +2 -0
  54. mlrun/db/httpdb.py +29 -19
  55. mlrun/db/nopdb.py +2 -1
  56. mlrun/errors.py +1 -1
  57. mlrun/execution.py +21 -9
  58. mlrun/feature_store/feature_set.py +0 -12
  59. mlrun/feature_store/retrieval/base.py +1 -1
  60. mlrun/feature_store/retrieval/dask_merger.py +1 -1
  61. mlrun/feature_store/retrieval/job.py +1 -1
  62. mlrun/feature_store/retrieval/spark_merger.py +0 -2
  63. mlrun/feature_store/steps.py +1 -1
  64. mlrun/features.py +1 -1
  65. mlrun/frameworks/_common/artifacts_library.py +1 -1
  66. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  67. mlrun/frameworks/_common/model_handler.py +3 -3
  68. mlrun/frameworks/_common/producer.py +0 -1
  69. mlrun/frameworks/_common/utils.py +1 -1
  70. mlrun/frameworks/_dl_common/loggers/logger.py +0 -1
  71. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  72. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  73. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  74. mlrun/frameworks/_dl_common/utils.py +1 -1
  75. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  76. mlrun/frameworks/_ml_common/loggers/logger.py +0 -1
  77. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  78. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  79. mlrun/frameworks/_ml_common/pkl_model_server.py +1 -1
  80. mlrun/frameworks/_ml_common/plan.py +1 -1
  81. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +0 -1
  82. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +0 -1
  83. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  84. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  85. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  86. mlrun/frameworks/_ml_common/producer.py +1 -1
  87. mlrun/frameworks/_ml_common/utils.py +1 -1
  88. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  89. mlrun/frameworks/lgbm/callbacks/logging_callback.py +0 -1
  90. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +0 -1
  91. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  92. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  93. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  94. mlrun/frameworks/lgbm/model_handler.py +1 -1
  95. mlrun/frameworks/lgbm/model_server.py +1 -1
  96. mlrun/frameworks/lgbm/utils.py +1 -1
  97. mlrun/frameworks/onnx/dataset.py +1 -1
  98. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  99. mlrun/frameworks/onnx/model_handler.py +1 -1
  100. mlrun/frameworks/onnx/model_server.py +1 -1
  101. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  102. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  103. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  104. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  105. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  106. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  107. mlrun/frameworks/pytorch/model_handler.py +1 -1
  108. mlrun/frameworks/pytorch/model_server.py +1 -1
  109. mlrun/frameworks/pytorch/utils.py +1 -1
  110. mlrun/frameworks/sklearn/__init__.py +0 -14
  111. mlrun/frameworks/sklearn/estimator.py +1 -1
  112. mlrun/frameworks/sklearn/metric.py +1 -1
  113. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  114. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  115. mlrun/frameworks/sklearn/model_handler.py +1 -1
  116. mlrun/frameworks/sklearn/utils.py +1 -1
  117. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  118. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  119. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  120. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  121. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  122. mlrun/frameworks/tf_keras/model_server.py +1 -1
  123. mlrun/frameworks/tf_keras/utils.py +1 -1
  124. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  125. mlrun/frameworks/xgboost/model_handler.py +1 -1
  126. mlrun/frameworks/xgboost/utils.py +1 -1
  127. mlrun/k8s_utils.py +340 -0
  128. mlrun/launcher/base.py +3 -3
  129. mlrun/launcher/local.py +2 -2
  130. mlrun/launcher/remote.py +2 -2
  131. mlrun/model.py +14 -0
  132. mlrun/model_monitoring/applications/__init__.py +0 -1
  133. mlrun/model_monitoring/applications/_application_steps.py +3 -1
  134. mlrun/model_monitoring/controller.py +3 -1
  135. mlrun/model_monitoring/db/tsdb/base.py +3 -1
  136. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  137. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +27 -49
  138. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -30
  139. mlrun/model_monitoring/stream_processing.py +7 -11
  140. mlrun/package/context_handler.py +1 -1
  141. mlrun/package/errors.py +1 -1
  142. mlrun/package/packager.py +1 -1
  143. mlrun/package/packagers/default_packager.py +1 -1
  144. mlrun/package/packagers/numpy_packagers.py +1 -1
  145. mlrun/package/packagers/pandas_packagers.py +1 -1
  146. mlrun/package/packagers/python_standard_library_packagers.py +1 -1
  147. mlrun/package/packagers_manager.py +1 -1
  148. mlrun/package/utils/_archiver.py +1 -1
  149. mlrun/package/utils/_formatter.py +1 -1
  150. mlrun/package/utils/_pickler.py +1 -1
  151. mlrun/package/utils/_supported_format.py +1 -1
  152. mlrun/package/utils/log_hint_utils.py +1 -1
  153. mlrun/package/utils/type_hint_utils.py +1 -1
  154. mlrun/projects/operations.py +36 -21
  155. mlrun/projects/project.py +82 -74
  156. mlrun/run.py +1 -1
  157. mlrun/runtimes/base.py +16 -6
  158. mlrun/runtimes/daskjob.py +2 -1
  159. mlrun/runtimes/databricks_job/databricks_cancel_task.py +0 -1
  160. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  161. mlrun/runtimes/databricks_job/databricks_wrapper.py +0 -1
  162. mlrun/runtimes/mounts.py +2 -0
  163. mlrun/runtimes/nuclio/function.py +6 -1
  164. mlrun/runtimes/nuclio/serving.py +1 -1
  165. mlrun/runtimes/pod.py +4 -349
  166. mlrun/runtimes/sparkjob/spark3job.py +0 -12
  167. mlrun/serving/merger.py +0 -1
  168. mlrun/serving/remote.py +1 -1
  169. mlrun/serving/serving_wrapper.py +1 -1
  170. mlrun/serving/states.py +6 -3
  171. mlrun/serving/utils.py +1 -1
  172. mlrun/utils/async_http.py +0 -1
  173. mlrun/utils/clones.py +1 -1
  174. mlrun/utils/db.py +1 -1
  175. mlrun/utils/helpers.py +3 -1
  176. mlrun/utils/http.py +0 -1
  177. mlrun/utils/notifications/notification/webhook.py +18 -2
  178. mlrun/utils/regex.py +0 -1
  179. mlrun/utils/singleton.py +1 -1
  180. mlrun/utils/vault.py +1 -1
  181. mlrun/utils/version/__init__.py +1 -1
  182. mlrun/utils/version/version.json +2 -2
  183. mlrun/utils/version/version.py +1 -1
  184. {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/METADATA +7 -11
  185. mlrun-1.10.0rc1.dist-info/RECORD +351 -0
  186. {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/WHEEL +1 -1
  187. mlrun-1.9.0rc2.dist-info/RECORD +0 -350
  188. {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/entry_points.txt +0 -0
  189. {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  190. {mlrun-1.9.0rc2.dist-info → mlrun-1.10.0rc1.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py CHANGED
@@ -11,7 +11,9 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import copy
14
15
  import re
16
+ import typing
15
17
  import warnings
16
18
 
17
19
  import kubernetes.client
@@ -228,3 +230,341 @@ def validate_node_selectors(
228
230
  handle_invalid(str(err))
229
231
  return False
230
232
  return True
233
+
234
+
235
+ def enrich_preemption_mode(
236
+ preemption_mode: typing.Optional[str],
237
+ node_selector: dict[str, str],
238
+ tolerations: list[kubernetes.client.V1Toleration],
239
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
240
+ ) -> tuple[
241
+ dict[str, str],
242
+ list[kubernetes.client.V1Toleration],
243
+ typing.Optional[kubernetes.client.V1Affinity],
244
+ ]:
245
+ """
246
+ Enriches a pod spec's scheduling configuration (node selector, tolerations, affinity)
247
+ based on the provided preemption mode.
248
+
249
+ If no preemptible node configuration is defined in the system, or the mode is `none`,
250
+ the original values are returned unchanged.
251
+
252
+ Modes:
253
+ - allow: Adds tolerations, removes preemption constraints.
254
+ - constrain: Requires preemptible node affinity and adds tolerations.
255
+ - prevent: Enforces scheduling on non-preemptible nodes using taints or anti-affinity.
256
+ - none: No enrichment is applied.
257
+ """
258
+ if (
259
+ not mlconfig.is_preemption_nodes_configured()
260
+ or preemption_mode == mlrun.common.schemas.PreemptionModes.none.value
261
+ ):
262
+ return node_selector, tolerations, affinity
263
+
264
+ if not preemption_mode:
265
+ preemption_mode = mlconfig.function_defaults.preemption_mode
266
+ mlrun.utils.logger.debug(
267
+ "No preemption mode provided, using default",
268
+ default_preemption_mode=preemption_mode,
269
+ )
270
+
271
+ enriched_node_selector = copy.deepcopy(node_selector or {})
272
+ enriched_tolerations = copy.deepcopy(tolerations or [])
273
+ enriched_affinity = copy.deepcopy(affinity)
274
+ preemptible_tolerations = generate_preemptible_tolerations()
275
+
276
+ if handler := _get_mode_handler(preemption_mode):
277
+ enriched_node_selector, enriched_tolerations, enriched_affinity = handler(
278
+ enriched_node_selector,
279
+ enriched_tolerations,
280
+ enriched_affinity,
281
+ preemptible_tolerations,
282
+ )
283
+
284
+ return (
285
+ enriched_node_selector,
286
+ enriched_tolerations,
287
+ _prune_empty_affinity(enriched_affinity),
288
+ )
289
+
290
+
291
+ def _get_mode_handler(mode: str):
292
+ return {
293
+ mlrun.common.schemas.PreemptionModes.prevent: _handle_prevent_mode,
294
+ mlrun.common.schemas.PreemptionModes.constrain: _handle_constrain_mode,
295
+ mlrun.common.schemas.PreemptionModes.allow: _handle_allow_mode,
296
+ }.get(mode)
297
+
298
+
299
+ def _handle_prevent_mode(
300
+ node_selector: dict[str, str],
301
+ tolerations: list[kubernetes.client.V1Toleration],
302
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
303
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
304
+ ) -> tuple[
305
+ dict[str, str],
306
+ list[kubernetes.client.V1Toleration],
307
+ typing.Optional[kubernetes.client.V1Affinity],
308
+ ]:
309
+ # Ensure no preemptible node tolerations
310
+ tolerations = [t for t in tolerations if t not in preemptible_tolerations]
311
+
312
+ # Purge affinity preemption-related configuration
313
+ affinity = _prune_affinity_node_selector_requirement(
314
+ generate_preemptible_node_selector_requirements(
315
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value
316
+ ),
317
+ affinity=affinity,
318
+ )
319
+
320
+ # Remove preemptible nodes constraint
321
+ node_selector = _prune_node_selector(
322
+ mlconfig.get_preemptible_node_selector(),
323
+ enriched_node_selector=node_selector,
324
+ )
325
+
326
+ # Use anti-affinity only if no tolerations configured
327
+ if not preemptible_tolerations:
328
+ affinity = _override_required_during_scheduling_ignored_during_execution(
329
+ kubernetes.client.V1NodeSelector(
330
+ node_selector_terms=generate_preemptible_nodes_anti_affinity_terms()
331
+ ),
332
+ affinity,
333
+ )
334
+
335
+ return node_selector, tolerations, affinity
336
+
337
+
338
+ def _handle_constrain_mode(
339
+ node_selector: dict[str, str],
340
+ tolerations: list[kubernetes.client.V1Toleration],
341
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
342
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
343
+ ) -> tuple[
344
+ dict[str, str],
345
+ list[kubernetes.client.V1Toleration],
346
+ typing.Optional[kubernetes.client.V1Affinity],
347
+ ]:
348
+ tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
349
+
350
+ affinity = _override_required_during_scheduling_ignored_during_execution(
351
+ kubernetes.client.V1NodeSelector(
352
+ node_selector_terms=generate_preemptible_nodes_affinity_terms()
353
+ ),
354
+ affinity=affinity,
355
+ )
356
+
357
+ return node_selector, tolerations, affinity
358
+
359
+
360
+ def _handle_allow_mode(
361
+ node_selector: dict[str, str],
362
+ tolerations: list[kubernetes.client.V1Toleration],
363
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
364
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
365
+ ) -> tuple[
366
+ dict[str, str],
367
+ list[kubernetes.client.V1Toleration],
368
+ typing.Optional[kubernetes.client.V1Affinity],
369
+ ]:
370
+ for op in [
371
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
372
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
373
+ ]:
374
+ affinity = _prune_affinity_node_selector_requirement(
375
+ generate_preemptible_node_selector_requirements(op),
376
+ affinity=affinity,
377
+ )
378
+
379
+ node_selector = _prune_node_selector(
380
+ mlconfig.get_preemptible_node_selector(),
381
+ enriched_node_selector=node_selector,
382
+ )
383
+
384
+ tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
385
+ return node_selector, tolerations, affinity
386
+
387
+
388
+ def _merge_tolerations(
389
+ existing: list[kubernetes.client.V1Toleration],
390
+ to_add: list[kubernetes.client.V1Toleration],
391
+ ) -> list[kubernetes.client.V1Toleration]:
392
+ for toleration in to_add:
393
+ if toleration not in existing:
394
+ existing.append(toleration)
395
+ return existing
396
+
397
+
398
+ def _prune_node_selector(
399
+ node_selector: dict[str, str],
400
+ enriched_node_selector: dict[str, str],
401
+ ):
402
+ """
403
+ Prunes given node_selector key from function spec if their key and value are matching
404
+ :param node_selector: node selectors to prune
405
+ """
406
+ # both needs to exists to prune required node_selector from the spec node selector
407
+ if not node_selector or not enriched_node_selector:
408
+ return
409
+
410
+ mlrun.utils.logger.debug("Pruning node selectors", node_selector=node_selector)
411
+ return {
412
+ key: value
413
+ for key, value in enriched_node_selector.items()
414
+ if node_selector.get(key) != value
415
+ }
416
+
417
+
418
+ def _prune_affinity_node_selector_requirement(
419
+ node_selector_requirements: list[kubernetes.client.V1NodeSelectorRequirement],
420
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
421
+ ):
422
+ """
423
+ Prunes given node selector requirements from affinity.
424
+ We are only editing required_during_scheduling_ignored_during_execution because the scheduler can't schedule
425
+ the pod unless the rule is met.
426
+ :param node_selector_requirements:
427
+ :return:
428
+ """
429
+ # both needs to exist to prune required affinity from spec affinity
430
+ if not affinity or not node_selector_requirements:
431
+ return
432
+ if affinity.node_affinity:
433
+ node_affinity: kubernetes.client.V1NodeAffinity = affinity.node_affinity
434
+
435
+ new_required_during_scheduling_ignored_during_execution = None
436
+ if node_affinity.required_during_scheduling_ignored_during_execution:
437
+ node_selector: kubernetes.client.V1NodeSelector = (
438
+ node_affinity.required_during_scheduling_ignored_during_execution
439
+ )
440
+ new_node_selector_terms = (
441
+ _prune_node_selector_requirements_from_node_selector_terms(
442
+ node_selector_terms=node_selector.node_selector_terms,
443
+ requirements_to_prune=node_selector_requirements,
444
+ )
445
+ )
446
+ # check whether there are node selector terms to add to the new list of required terms
447
+ if new_node_selector_terms:
448
+ new_required_during_scheduling_ignored_during_execution = (
449
+ kubernetes.client.V1NodeSelector(
450
+ node_selector_terms=new_node_selector_terms
451
+ )
452
+ )
453
+ # if both preferred and new required are empty, clean node_affinity
454
+ if (
455
+ not node_affinity.preferred_during_scheduling_ignored_during_execution
456
+ and not new_required_during_scheduling_ignored_during_execution
457
+ ):
458
+ affinity.node_affinity = None
459
+ return
460
+
461
+ _initialize_affinity(affinity=affinity)
462
+ _initialize_node_affinity(affinity=affinity)
463
+
464
+ affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
465
+ new_required_during_scheduling_ignored_during_execution
466
+ )
467
+ return affinity
468
+
469
+
470
+ def _prune_node_selector_requirements_from_node_selector_terms(
471
+ node_selector_terms: list[kubernetes.client.V1NodeSelectorTerm],
472
+ requirements_to_prune: list[kubernetes.client.V1NodeSelectorRequirement],
473
+ ) -> list[kubernetes.client.V1NodeSelectorTerm]:
474
+ """
475
+ Removes matching node selector requirements from the given list of node selector terms.
476
+
477
+ Each term may contain multiple match expressions. This function iterates over each expression,
478
+ and removes any that exactly match one of the requirements provided.
479
+
480
+ :param node_selector_terms: List of V1NodeSelectorTerm objects to be processed.
481
+ :param requirements_to_prune: List of V1NodeSelectorRequirement objects to remove.
482
+ :return: A new list of V1NodeSelectorTerm objects with the specified requirements pruned.
483
+ """
484
+ pruned_terms = []
485
+
486
+ for term in node_selector_terms:
487
+ remaining_requirements = [
488
+ expr
489
+ for expr in term.match_expressions or []
490
+ if expr not in requirements_to_prune
491
+ ]
492
+
493
+ # Only add term if there are remaining match expressions or match fields
494
+ if remaining_requirements or term.match_fields:
495
+ pruned_terms.append(
496
+ kubernetes.client.V1NodeSelectorTerm(
497
+ match_expressions=remaining_requirements,
498
+ match_fields=term.match_fields,
499
+ )
500
+ )
501
+
502
+ return pruned_terms
503
+
504
+
505
+ def _override_required_during_scheduling_ignored_during_execution(
506
+ node_selector: kubernetes.client.V1NodeSelector,
507
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
508
+ ):
509
+ affinity = _initialize_affinity(affinity)
510
+ affinity = _initialize_node_affinity(affinity)
511
+ affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
512
+ node_selector
513
+ )
514
+ return affinity
515
+
516
+
517
+ def _initialize_affinity(
518
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
519
+ ) -> kubernetes.client.V1Affinity:
520
+ return affinity or kubernetes.client.V1Affinity()
521
+
522
+
523
+ def _initialize_node_affinity(
524
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
525
+ ) -> kubernetes.client.V1Affinity:
526
+ affinity = affinity or kubernetes.client.V1Affinity()
527
+ affinity.node_affinity = (
528
+ affinity.node_affinity or kubernetes.client.V1NodeAffinity()
529
+ )
530
+ return affinity
531
+
532
+
533
+ def _prune_empty_affinity(
534
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
535
+ ) -> typing.Optional[kubernetes.client.V1Affinity]:
536
+ """
537
+ Return None if the given affinity object has no meaningful constraints.
538
+
539
+ Keeps the affinity object only if it contains:
540
+ - Any pod affinity or pod anti-affinity
541
+ - Preferred node affinity
542
+ - Required node affinity with at least one match expression or match field
543
+ """
544
+ if not affinity:
545
+ return None
546
+
547
+ node_affinity = affinity.node_affinity
548
+ pod_affinity = affinity.pod_affinity
549
+ pod_anti_affinity = affinity.pod_anti_affinity
550
+
551
+ # If any pod affinity exists, keep the object
552
+ if pod_affinity or pod_anti_affinity:
553
+ return affinity
554
+
555
+ # If node affinity exists, check if it has any meaningful content
556
+ if node_affinity:
557
+ required = node_affinity.required_during_scheduling_ignored_during_execution
558
+ preferred = node_affinity.preferred_during_scheduling_ignored_during_execution
559
+
560
+ if preferred:
561
+ return affinity
562
+
563
+ if required and required.node_selector_terms:
564
+ for term in required.node_selector_terms:
565
+ if term.match_expressions or term.match_fields:
566
+ return affinity # at least one term has meaningful constraints
567
+
568
+ # At this point, none of the affinity sections contain meaningful constraints,
569
+ # so the affinity object is effectively empty and can be safely discarded.
570
+ return None
mlrun/launcher/base.py CHANGED
@@ -57,6 +57,7 @@ class BaseLauncher(abc.ABC):
57
57
  out_path: Optional[str] = "",
58
58
  workdir: Optional[str] = "",
59
59
  artifact_path: Optional[str] = "",
60
+ output_path: Optional[str] = "",
60
61
  watch: Optional[bool] = True,
61
62
  schedule: Optional[
62
63
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -234,8 +235,7 @@ class BaseLauncher(abc.ABC):
234
235
  hyper_param_options=None,
235
236
  verbose=None,
236
237
  scrape_metrics=None,
237
- out_path=None,
238
- artifact_path=None,
238
+ output_path=None,
239
239
  workdir=None,
240
240
  notifications: Optional[list[mlrun.model.Notification]] = None,
241
241
  state_thresholds: Optional[dict[str, int]] = None,
@@ -301,7 +301,7 @@ class BaseLauncher(abc.ABC):
301
301
  meta = run.metadata
302
302
  meta.uid = meta.uid or uuid.uuid4().hex
303
303
 
304
- run.spec.output_path = out_path or artifact_path or run.spec.output_path
304
+ run.spec.output_path = output_path or run.spec.output_path
305
305
 
306
306
  if not run.spec.output_path:
307
307
  if run.metadata.project:
mlrun/launcher/local.py CHANGED
@@ -55,6 +55,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
55
55
  out_path: Optional[str] = "",
56
56
  workdir: Optional[str] = "",
57
57
  artifact_path: Optional[str] = "",
58
+ output_path: Optional[str] = "",
58
59
  watch: Optional[bool] = True,
59
60
  schedule: Optional[
60
61
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -116,8 +117,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
116
117
  hyper_param_options=hyper_param_options,
117
118
  verbose=verbose,
118
119
  scrape_metrics=scrape_metrics,
119
- out_path=out_path,
120
- artifact_path=artifact_path,
120
+ output_path=output_path,
121
121
  workdir=workdir,
122
122
  notifications=notifications,
123
123
  state_thresholds=state_thresholds,
mlrun/launcher/remote.py CHANGED
@@ -45,6 +45,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
45
45
  out_path: Optional[str] = "",
46
46
  workdir: Optional[str] = "",
47
47
  artifact_path: Optional[str] = "",
48
+ output_path: Optional[str] = "",
48
49
  watch: Optional[bool] = True,
49
50
  schedule: Optional[
50
51
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -77,8 +78,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
77
78
  hyper_param_options=hyper_param_options,
78
79
  verbose=verbose,
79
80
  scrape_metrics=scrape_metrics,
80
- out_path=out_path,
81
- artifact_path=artifact_path,
81
+ output_path=output_path,
82
82
  workdir=workdir,
83
83
  notifications=notifications,
84
84
  state_thresholds=state_thresholds,
mlrun/model.py CHANGED
@@ -929,6 +929,8 @@ class RunSpec(ModelObj):
929
929
 
930
930
  _fields_to_serialize = ModelObj._fields_to_serialize + [
931
931
  "handler",
932
+ "affinity",
933
+ "tolerations",
932
934
  ]
933
935
 
934
936
  def __init__(
@@ -956,6 +958,8 @@ class RunSpec(ModelObj):
956
958
  state_thresholds=None,
957
959
  reset_on_run=None,
958
960
  node_selector=None,
961
+ tolerations=None,
962
+ affinity=None,
959
963
  ):
960
964
  # A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
961
965
  # keys (parameter names) and the values are the type hint given in the input keys after the colon.
@@ -994,6 +998,8 @@ class RunSpec(ModelObj):
994
998
  self.state_thresholds = state_thresholds or {}
995
999
  self.reset_on_run = reset_on_run
996
1000
  self.node_selector = node_selector or {}
1001
+ self.tolerations = tolerations or {}
1002
+ self.affinity = affinity or {}
997
1003
 
998
1004
  def _serialize_field(
999
1005
  self, struct: dict, field_name: Optional[str] = None, strip: bool = False
@@ -1003,6 +1009,14 @@ class RunSpec(ModelObj):
1003
1009
  if self.handler and isinstance(self.handler, str):
1004
1010
  return self.handler
1005
1011
  return None
1012
+
1013
+ # Properly serialize known K8s objects
1014
+ if field_name in {"affinity", "tolerations"}:
1015
+ value = getattr(self, field_name, None)
1016
+ if hasattr(value, "to_dict"):
1017
+ return value.to_dict()
1018
+ return value
1019
+
1006
1020
  return super()._serialize_field(struct, field_name, strip)
1007
1021
 
1008
1022
  def is_hyper_job(self):
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from .base import ModelMonitoringApplicationBase
17
16
  from .context import MonitoringApplicationContext
@@ -96,7 +96,9 @@ class _PushToMonitoringWriter(StepToDict):
96
96
  logger.debug(
97
97
  "Pushing data to output stream", writer_event=str(writer_event)
98
98
  )
99
- self.output_stream.push([writer_event])
99
+ self.output_stream.push(
100
+ [writer_event], partition_key=application_context.endpoint_id
101
+ )
100
102
  logger.debug("Pushed data to output stream successfully")
101
103
 
102
104
  def _lazy_init(self):
@@ -673,7 +673,9 @@ class MonitoringApplicationController:
673
673
  """
674
674
  logger.info("Starting monitoring controller chief")
675
675
  applications_names = []
676
- endpoints = self.project_obj.list_model_endpoints(tsdb_metrics=True).endpoints
676
+ endpoints = self.project_obj.list_model_endpoints(
677
+ metric_list=["last_request"]
678
+ ).endpoints
677
679
  if not endpoints:
678
680
  logger.info("No model endpoints found", project=self.project)
679
681
  return
@@ -82,7 +82,8 @@ class TSDBConnector(ABC):
82
82
 
83
83
  @abstractmethod
84
84
  def delete_tsdb_records(
85
- self, endpoint_ids: list[str], delete_timeout: Optional[int] = None
85
+ self,
86
+ endpoint_ids: list[str],
86
87
  ) -> None:
87
88
  """
88
89
  Delete model endpoint records from the TSDB connector.
@@ -332,6 +333,7 @@ class TSDBConnector(ABC):
332
333
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
333
334
  project: str,
334
335
  run_in_threadpool: Callable,
336
+ metric_list: Optional[list[str]] = None,
335
337
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
336
338
  raise NotImplementedError()
337
339