mlrun 1.9.0rc4__py3-none-any.whl → 1.10.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (193) hide show
  1. mlrun/__main__.py +13 -2
  2. mlrun/api/schemas/__init__.py +0 -1
  3. mlrun/common/__init__.py +0 -1
  4. mlrun/common/constants.py +7 -0
  5. mlrun/common/db/__init__.py +0 -1
  6. mlrun/common/db/sql_session.py +0 -1
  7. mlrun/common/formatters/__init__.py +0 -1
  8. mlrun/common/formatters/artifact.py +0 -1
  9. mlrun/common/formatters/base.py +0 -1
  10. mlrun/common/formatters/feature_set.py +0 -1
  11. mlrun/common/formatters/function.py +0 -1
  12. mlrun/common/formatters/model_endpoint.py +0 -1
  13. mlrun/common/formatters/pipeline.py +0 -1
  14. mlrun/common/formatters/project.py +0 -1
  15. mlrun/common/formatters/run.py +0 -2
  16. mlrun/common/runtimes/constants.py +9 -2
  17. mlrun/common/schemas/__init__.py +2 -0
  18. mlrun/common/schemas/alert.py +1 -1
  19. mlrun/common/schemas/api_gateway.py +1 -1
  20. mlrun/common/schemas/artifact.py +1 -1
  21. mlrun/common/schemas/auth.py +1 -1
  22. mlrun/common/schemas/background_task.py +1 -1
  23. mlrun/common/schemas/client_spec.py +1 -1
  24. mlrun/common/schemas/clusterization_spec.py +1 -1
  25. mlrun/common/schemas/constants.py +1 -1
  26. mlrun/common/schemas/datastore_profile.py +0 -1
  27. mlrun/common/schemas/events.py +1 -1
  28. mlrun/common/schemas/feature_store.py +1 -1
  29. mlrun/common/schemas/frontend_spec.py +1 -1
  30. mlrun/common/schemas/function.py +1 -1
  31. mlrun/common/schemas/http.py +1 -1
  32. mlrun/common/schemas/hub.py +1 -1
  33. mlrun/common/schemas/k8s.py +1 -1
  34. mlrun/common/schemas/memory_reports.py +0 -1
  35. mlrun/common/schemas/notification.py +4 -0
  36. mlrun/common/schemas/object.py +1 -1
  37. mlrun/common/schemas/partition.py +1 -1
  38. mlrun/common/schemas/pipeline.py +1 -1
  39. mlrun/common/schemas/project.py +1 -1
  40. mlrun/common/schemas/regex.py +1 -1
  41. mlrun/common/schemas/runtime_resource.py +1 -1
  42. mlrun/common/schemas/schedule.py +1 -1
  43. mlrun/common/schemas/secret.py +1 -1
  44. mlrun/common/schemas/serving.py +17 -0
  45. mlrun/common/schemas/tag.py +0 -1
  46. mlrun/common/schemas/workflow.py +1 -1
  47. mlrun/common/secrets.py +0 -1
  48. mlrun/config.py +9 -11
  49. mlrun/data_types/infer.py +1 -1
  50. mlrun/data_types/spark.py +1 -1
  51. mlrun/datastore/datastore.py +1 -1
  52. mlrun/datastore/datastore_profile.py +5 -55
  53. mlrun/datastore/snowflake_utils.py +0 -1
  54. mlrun/datastore/sources.py +21 -13
  55. mlrun/datastore/spark_utils.py +0 -1
  56. mlrun/datastore/utils.py +20 -5
  57. mlrun/db/base.py +1 -1
  58. mlrun/db/httpdb.py +17 -12
  59. mlrun/db/nopdb.py +1 -2
  60. mlrun/errors.py +8 -1
  61. mlrun/execution.py +22 -1
  62. mlrun/feature_store/feature_set.py +0 -12
  63. mlrun/feature_store/retrieval/base.py +1 -1
  64. mlrun/feature_store/retrieval/dask_merger.py +1 -1
  65. mlrun/feature_store/retrieval/job.py +1 -1
  66. mlrun/feature_store/retrieval/spark_merger.py +0 -2
  67. mlrun/feature_store/steps.py +1 -1
  68. mlrun/features.py +1 -1
  69. mlrun/frameworks/_common/artifacts_library.py +1 -1
  70. mlrun/frameworks/_common/mlrun_interface.py +1 -1
  71. mlrun/frameworks/_common/model_handler.py +3 -3
  72. mlrun/frameworks/_common/producer.py +0 -1
  73. mlrun/frameworks/_common/utils.py +1 -1
  74. mlrun/frameworks/_dl_common/loggers/logger.py +0 -1
  75. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
  76. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
  77. mlrun/frameworks/_dl_common/model_handler.py +1 -1
  78. mlrun/frameworks/_dl_common/utils.py +1 -1
  79. mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
  80. mlrun/frameworks/_ml_common/loggers/logger.py +0 -1
  81. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  82. mlrun/frameworks/_ml_common/model_handler.py +1 -1
  83. mlrun/frameworks/_ml_common/pkl_model_server.py +1 -1
  84. mlrun/frameworks/_ml_common/plan.py +1 -1
  85. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +0 -1
  86. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +0 -1
  87. mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
  88. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
  89. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
  90. mlrun/frameworks/_ml_common/producer.py +1 -1
  91. mlrun/frameworks/_ml_common/utils.py +1 -1
  92. mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
  93. mlrun/frameworks/lgbm/callbacks/logging_callback.py +0 -1
  94. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +0 -1
  95. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
  96. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
  97. mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
  98. mlrun/frameworks/lgbm/model_handler.py +1 -1
  99. mlrun/frameworks/lgbm/model_server.py +1 -1
  100. mlrun/frameworks/lgbm/utils.py +1 -1
  101. mlrun/frameworks/onnx/dataset.py +1 -1
  102. mlrun/frameworks/onnx/mlrun_interface.py +1 -1
  103. mlrun/frameworks/onnx/model_handler.py +1 -1
  104. mlrun/frameworks/onnx/model_server.py +1 -1
  105. mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
  106. mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
  107. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
  108. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
  109. mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
  110. mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
  111. mlrun/frameworks/pytorch/model_handler.py +1 -1
  112. mlrun/frameworks/pytorch/model_server.py +1 -1
  113. mlrun/frameworks/pytorch/utils.py +1 -1
  114. mlrun/frameworks/sklearn/__init__.py +0 -14
  115. mlrun/frameworks/sklearn/estimator.py +1 -1
  116. mlrun/frameworks/sklearn/metric.py +1 -1
  117. mlrun/frameworks/sklearn/metrics_library.py +1 -1
  118. mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
  119. mlrun/frameworks/sklearn/model_handler.py +1 -1
  120. mlrun/frameworks/sklearn/utils.py +1 -1
  121. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  122. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
  123. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
  124. mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
  125. mlrun/frameworks/tf_keras/model_handler.py +1 -1
  126. mlrun/frameworks/tf_keras/model_server.py +1 -1
  127. mlrun/frameworks/tf_keras/utils.py +1 -1
  128. mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
  129. mlrun/frameworks/xgboost/model_handler.py +1 -1
  130. mlrun/frameworks/xgboost/utils.py +1 -1
  131. mlrun/k8s_utils.py +340 -0
  132. mlrun/launcher/base.py +3 -3
  133. mlrun/launcher/client.py +1 -1
  134. mlrun/launcher/local.py +2 -2
  135. mlrun/launcher/remote.py +2 -2
  136. mlrun/model.py +14 -0
  137. mlrun/model_monitoring/applications/__init__.py +0 -1
  138. mlrun/model_monitoring/applications/_application_steps.py +3 -1
  139. mlrun/model_monitoring/applications/evidently/base.py +59 -71
  140. mlrun/model_monitoring/controller.py +26 -13
  141. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +13 -5
  142. mlrun/package/context_handler.py +1 -1
  143. mlrun/package/errors.py +1 -1
  144. mlrun/package/packager.py +1 -1
  145. mlrun/package/packagers/default_packager.py +1 -1
  146. mlrun/package/packagers/numpy_packagers.py +1 -1
  147. mlrun/package/packagers/pandas_packagers.py +1 -1
  148. mlrun/package/packagers/python_standard_library_packagers.py +1 -1
  149. mlrun/package/packagers_manager.py +1 -1
  150. mlrun/package/utils/_archiver.py +1 -1
  151. mlrun/package/utils/_formatter.py +1 -1
  152. mlrun/package/utils/_pickler.py +1 -1
  153. mlrun/package/utils/_supported_format.py +1 -1
  154. mlrun/package/utils/log_hint_utils.py +1 -1
  155. mlrun/package/utils/type_hint_utils.py +1 -1
  156. mlrun/projects/operations.py +36 -21
  157. mlrun/projects/project.py +147 -92
  158. mlrun/render.py +5 -9
  159. mlrun/run.py +1 -1
  160. mlrun/runtimes/base.py +17 -7
  161. mlrun/runtimes/daskjob.py +2 -1
  162. mlrun/runtimes/databricks_job/databricks_cancel_task.py +0 -1
  163. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  164. mlrun/runtimes/databricks_job/databricks_wrapper.py +0 -1
  165. mlrun/runtimes/mounts.py +2 -0
  166. mlrun/runtimes/nuclio/function.py +5 -0
  167. mlrun/runtimes/nuclio/serving.py +1 -1
  168. mlrun/runtimes/pod.py +4 -349
  169. mlrun/runtimes/sparkjob/spark3job.py +0 -12
  170. mlrun/runtimes/utils.py +25 -8
  171. mlrun/serving/merger.py +0 -1
  172. mlrun/serving/remote.py +1 -1
  173. mlrun/serving/serving_wrapper.py +1 -1
  174. mlrun/serving/states.py +142 -9
  175. mlrun/serving/utils.py +1 -1
  176. mlrun/utils/async_http.py +0 -1
  177. mlrun/utils/clones.py +1 -1
  178. mlrun/utils/db.py +1 -1
  179. mlrun/utils/helpers.py +3 -1
  180. mlrun/utils/http.py +0 -1
  181. mlrun/utils/regex.py +0 -1
  182. mlrun/utils/singleton.py +1 -1
  183. mlrun/utils/vault.py +1 -1
  184. mlrun/utils/version/__init__.py +1 -1
  185. mlrun/utils/version/version.json +2 -2
  186. mlrun/utils/version/version.py +1 -1
  187. {mlrun-1.9.0rc4.dist-info → mlrun-1.10.0rc2.dist-info}/METADATA +12 -12
  188. mlrun-1.10.0rc2.dist-info/RECORD +351 -0
  189. {mlrun-1.9.0rc4.dist-info → mlrun-1.10.0rc2.dist-info}/WHEEL +1 -1
  190. mlrun-1.9.0rc4.dist-info/RECORD +0 -351
  191. {mlrun-1.9.0rc4.dist-info → mlrun-1.10.0rc2.dist-info}/entry_points.txt +0 -0
  192. {mlrun-1.9.0rc4.dist-info → mlrun-1.10.0rc2.dist-info}/licenses/LICENSE +0 -0
  193. {mlrun-1.9.0rc4.dist-info → mlrun-1.10.0rc2.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py CHANGED
@@ -11,7 +11,9 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import copy
14
15
  import re
16
+ import typing
15
17
  import warnings
16
18
 
17
19
  import kubernetes.client
@@ -228,3 +230,341 @@ def validate_node_selectors(
228
230
  handle_invalid(str(err))
229
231
  return False
230
232
  return True
233
+
234
+
235
+ def enrich_preemption_mode(
236
+ preemption_mode: typing.Optional[str],
237
+ node_selector: dict[str, str],
238
+ tolerations: list[kubernetes.client.V1Toleration],
239
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
240
+ ) -> tuple[
241
+ dict[str, str],
242
+ list[kubernetes.client.V1Toleration],
243
+ typing.Optional[kubernetes.client.V1Affinity],
244
+ ]:
245
+ """
246
+ Enriches a pod spec's scheduling configuration (node selector, tolerations, affinity)
247
+ based on the provided preemption mode.
248
+
249
+ If no preemptible node configuration is defined in the system, or the mode is `none`,
250
+ the original values are returned unchanged.
251
+
252
+ Modes:
253
+ - allow: Adds tolerations, removes preemption constraints.
254
+ - constrain: Requires preemptible node affinity and adds tolerations.
255
+ - prevent: Enforces scheduling on non-preemptible nodes using taints or anti-affinity.
256
+ - none: No enrichment is applied.
257
+ """
258
+ if (
259
+ not mlconfig.is_preemption_nodes_configured()
260
+ or preemption_mode == mlrun.common.schemas.PreemptionModes.none.value
261
+ ):
262
+ return node_selector, tolerations, affinity
263
+
264
+ if not preemption_mode:
265
+ preemption_mode = mlconfig.function_defaults.preemption_mode
266
+ mlrun.utils.logger.debug(
267
+ "No preemption mode provided, using default",
268
+ default_preemption_mode=preemption_mode,
269
+ )
270
+
271
+ enriched_node_selector = copy.deepcopy(node_selector or {})
272
+ enriched_tolerations = copy.deepcopy(tolerations or [])
273
+ enriched_affinity = copy.deepcopy(affinity)
274
+ preemptible_tolerations = generate_preemptible_tolerations()
275
+
276
+ if handler := _get_mode_handler(preemption_mode):
277
+ enriched_node_selector, enriched_tolerations, enriched_affinity = handler(
278
+ enriched_node_selector,
279
+ enriched_tolerations,
280
+ enriched_affinity,
281
+ preemptible_tolerations,
282
+ )
283
+
284
+ return (
285
+ enriched_node_selector,
286
+ enriched_tolerations,
287
+ _prune_empty_affinity(enriched_affinity),
288
+ )
289
+
290
+
291
+ def _get_mode_handler(mode: str):
292
+ return {
293
+ mlrun.common.schemas.PreemptionModes.prevent: _handle_prevent_mode,
294
+ mlrun.common.schemas.PreemptionModes.constrain: _handle_constrain_mode,
295
+ mlrun.common.schemas.PreemptionModes.allow: _handle_allow_mode,
296
+ }.get(mode)
297
+
298
+
299
+ def _handle_prevent_mode(
300
+ node_selector: dict[str, str],
301
+ tolerations: list[kubernetes.client.V1Toleration],
302
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
303
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
304
+ ) -> tuple[
305
+ dict[str, str],
306
+ list[kubernetes.client.V1Toleration],
307
+ typing.Optional[kubernetes.client.V1Affinity],
308
+ ]:
309
+ # Ensure no preemptible node tolerations
310
+ tolerations = [t for t in tolerations if t not in preemptible_tolerations]
311
+
312
+ # Purge affinity preemption-related configuration
313
+ affinity = _prune_affinity_node_selector_requirement(
314
+ generate_preemptible_node_selector_requirements(
315
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value
316
+ ),
317
+ affinity=affinity,
318
+ )
319
+
320
+ # Remove preemptible nodes constraint
321
+ node_selector = _prune_node_selector(
322
+ mlconfig.get_preemptible_node_selector(),
323
+ enriched_node_selector=node_selector,
324
+ )
325
+
326
+ # Use anti-affinity only if no tolerations configured
327
+ if not preemptible_tolerations:
328
+ affinity = _override_required_during_scheduling_ignored_during_execution(
329
+ kubernetes.client.V1NodeSelector(
330
+ node_selector_terms=generate_preemptible_nodes_anti_affinity_terms()
331
+ ),
332
+ affinity,
333
+ )
334
+
335
+ return node_selector, tolerations, affinity
336
+
337
+
338
+ def _handle_constrain_mode(
339
+ node_selector: dict[str, str],
340
+ tolerations: list[kubernetes.client.V1Toleration],
341
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
342
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
343
+ ) -> tuple[
344
+ dict[str, str],
345
+ list[kubernetes.client.V1Toleration],
346
+ typing.Optional[kubernetes.client.V1Affinity],
347
+ ]:
348
+ tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
349
+
350
+ affinity = _override_required_during_scheduling_ignored_during_execution(
351
+ kubernetes.client.V1NodeSelector(
352
+ node_selector_terms=generate_preemptible_nodes_affinity_terms()
353
+ ),
354
+ affinity=affinity,
355
+ )
356
+
357
+ return node_selector, tolerations, affinity
358
+
359
+
360
+ def _handle_allow_mode(
361
+ node_selector: dict[str, str],
362
+ tolerations: list[kubernetes.client.V1Toleration],
363
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
364
+ preemptible_tolerations: list[kubernetes.client.V1Toleration],
365
+ ) -> tuple[
366
+ dict[str, str],
367
+ list[kubernetes.client.V1Toleration],
368
+ typing.Optional[kubernetes.client.V1Affinity],
369
+ ]:
370
+ for op in [
371
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
372
+ mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
373
+ ]:
374
+ affinity = _prune_affinity_node_selector_requirement(
375
+ generate_preemptible_node_selector_requirements(op),
376
+ affinity=affinity,
377
+ )
378
+
379
+ node_selector = _prune_node_selector(
380
+ mlconfig.get_preemptible_node_selector(),
381
+ enriched_node_selector=node_selector,
382
+ )
383
+
384
+ tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
385
+ return node_selector, tolerations, affinity
386
+
387
+
388
+ def _merge_tolerations(
389
+ existing: list[kubernetes.client.V1Toleration],
390
+ to_add: list[kubernetes.client.V1Toleration],
391
+ ) -> list[kubernetes.client.V1Toleration]:
392
+ for toleration in to_add:
393
+ if toleration not in existing:
394
+ existing.append(toleration)
395
+ return existing
396
+
397
+
398
+ def _prune_node_selector(
399
+ node_selector: dict[str, str],
400
+ enriched_node_selector: dict[str, str],
401
+ ):
402
+ """
403
+ Prunes given node_selector key from function spec if their key and value are matching
404
+ :param node_selector: node selectors to prune
405
+ """
406
+ # both needs to exists to prune required node_selector from the spec node selector
407
+ if not node_selector or not enriched_node_selector:
408
+ return
409
+
410
+ mlrun.utils.logger.debug("Pruning node selectors", node_selector=node_selector)
411
+ return {
412
+ key: value
413
+ for key, value in enriched_node_selector.items()
414
+ if node_selector.get(key) != value
415
+ }
416
+
417
+
418
+ def _prune_affinity_node_selector_requirement(
419
+ node_selector_requirements: list[kubernetes.client.V1NodeSelectorRequirement],
420
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
421
+ ):
422
+ """
423
+ Prunes given node selector requirements from affinity.
424
+ We are only editing required_during_scheduling_ignored_during_execution because the scheduler can't schedule
425
+ the pod unless the rule is met.
426
+ :param node_selector_requirements:
427
+ :return:
428
+ """
429
+ # both needs to exist to prune required affinity from spec affinity
430
+ if not affinity or not node_selector_requirements:
431
+ return
432
+ if affinity.node_affinity:
433
+ node_affinity: kubernetes.client.V1NodeAffinity = affinity.node_affinity
434
+
435
+ new_required_during_scheduling_ignored_during_execution = None
436
+ if node_affinity.required_during_scheduling_ignored_during_execution:
437
+ node_selector: kubernetes.client.V1NodeSelector = (
438
+ node_affinity.required_during_scheduling_ignored_during_execution
439
+ )
440
+ new_node_selector_terms = (
441
+ _prune_node_selector_requirements_from_node_selector_terms(
442
+ node_selector_terms=node_selector.node_selector_terms,
443
+ requirements_to_prune=node_selector_requirements,
444
+ )
445
+ )
446
+ # check whether there are node selector terms to add to the new list of required terms
447
+ if new_node_selector_terms:
448
+ new_required_during_scheduling_ignored_during_execution = (
449
+ kubernetes.client.V1NodeSelector(
450
+ node_selector_terms=new_node_selector_terms
451
+ )
452
+ )
453
+ # if both preferred and new required are empty, clean node_affinity
454
+ if (
455
+ not node_affinity.preferred_during_scheduling_ignored_during_execution
456
+ and not new_required_during_scheduling_ignored_during_execution
457
+ ):
458
+ affinity.node_affinity = None
459
+ return
460
+
461
+ _initialize_affinity(affinity=affinity)
462
+ _initialize_node_affinity(affinity=affinity)
463
+
464
+ affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
465
+ new_required_during_scheduling_ignored_during_execution
466
+ )
467
+ return affinity
468
+
469
+
470
+ def _prune_node_selector_requirements_from_node_selector_terms(
471
+ node_selector_terms: list[kubernetes.client.V1NodeSelectorTerm],
472
+ requirements_to_prune: list[kubernetes.client.V1NodeSelectorRequirement],
473
+ ) -> list[kubernetes.client.V1NodeSelectorTerm]:
474
+ """
475
+ Removes matching node selector requirements from the given list of node selector terms.
476
+
477
+ Each term may contain multiple match expressions. This function iterates over each expression,
478
+ and removes any that exactly match one of the requirements provided.
479
+
480
+ :param node_selector_terms: List of V1NodeSelectorTerm objects to be processed.
481
+ :param requirements_to_prune: List of V1NodeSelectorRequirement objects to remove.
482
+ :return: A new list of V1NodeSelectorTerm objects with the specified requirements pruned.
483
+ """
484
+ pruned_terms = []
485
+
486
+ for term in node_selector_terms:
487
+ remaining_requirements = [
488
+ expr
489
+ for expr in term.match_expressions or []
490
+ if expr not in requirements_to_prune
491
+ ]
492
+
493
+ # Only add term if there are remaining match expressions or match fields
494
+ if remaining_requirements or term.match_fields:
495
+ pruned_terms.append(
496
+ kubernetes.client.V1NodeSelectorTerm(
497
+ match_expressions=remaining_requirements,
498
+ match_fields=term.match_fields,
499
+ )
500
+ )
501
+
502
+ return pruned_terms
503
+
504
+
505
+ def _override_required_during_scheduling_ignored_during_execution(
506
+ node_selector: kubernetes.client.V1NodeSelector,
507
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
508
+ ):
509
+ affinity = _initialize_affinity(affinity)
510
+ affinity = _initialize_node_affinity(affinity)
511
+ affinity.node_affinity.required_during_scheduling_ignored_during_execution = (
512
+ node_selector
513
+ )
514
+ return affinity
515
+
516
+
517
+ def _initialize_affinity(
518
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
519
+ ) -> kubernetes.client.V1Affinity:
520
+ return affinity or kubernetes.client.V1Affinity()
521
+
522
+
523
+ def _initialize_node_affinity(
524
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
525
+ ) -> kubernetes.client.V1Affinity:
526
+ affinity = affinity or kubernetes.client.V1Affinity()
527
+ affinity.node_affinity = (
528
+ affinity.node_affinity or kubernetes.client.V1NodeAffinity()
529
+ )
530
+ return affinity
531
+
532
+
533
+ def _prune_empty_affinity(
534
+ affinity: typing.Optional[kubernetes.client.V1Affinity],
535
+ ) -> typing.Optional[kubernetes.client.V1Affinity]:
536
+ """
537
+ Return None if the given affinity object has no meaningful constraints.
538
+
539
+ Keeps the affinity object only if it contains:
540
+ - Any pod affinity or pod anti-affinity
541
+ - Preferred node affinity
542
+ - Required node affinity with at least one match expression or match field
543
+ """
544
+ if not affinity:
545
+ return None
546
+
547
+ node_affinity = affinity.node_affinity
548
+ pod_affinity = affinity.pod_affinity
549
+ pod_anti_affinity = affinity.pod_anti_affinity
550
+
551
+ # If any pod affinity exists, keep the object
552
+ if pod_affinity or pod_anti_affinity:
553
+ return affinity
554
+
555
+ # If node affinity exists, check if it has any meaningful content
556
+ if node_affinity:
557
+ required = node_affinity.required_during_scheduling_ignored_during_execution
558
+ preferred = node_affinity.preferred_during_scheduling_ignored_during_execution
559
+
560
+ if preferred:
561
+ return affinity
562
+
563
+ if required and required.node_selector_terms:
564
+ for term in required.node_selector_terms:
565
+ if term.match_expressions or term.match_fields:
566
+ return affinity # at least one term has meaningful constraints
567
+
568
+ # At this point, none of the affinity sections contain meaningful constraints,
569
+ # so the affinity object is effectively empty and can be safely discarded.
570
+ return None
mlrun/launcher/base.py CHANGED
@@ -57,6 +57,7 @@ class BaseLauncher(abc.ABC):
57
57
  out_path: Optional[str] = "",
58
58
  workdir: Optional[str] = "",
59
59
  artifact_path: Optional[str] = "",
60
+ output_path: Optional[str] = "",
60
61
  watch: Optional[bool] = True,
61
62
  schedule: Optional[
62
63
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -234,8 +235,7 @@ class BaseLauncher(abc.ABC):
234
235
  hyper_param_options=None,
235
236
  verbose=None,
236
237
  scrape_metrics=None,
237
- out_path=None,
238
- artifact_path=None,
238
+ output_path=None,
239
239
  workdir=None,
240
240
  notifications: Optional[list[mlrun.model.Notification]] = None,
241
241
  state_thresholds: Optional[dict[str, int]] = None,
@@ -301,7 +301,7 @@ class BaseLauncher(abc.ABC):
301
301
  meta = run.metadata
302
302
  meta.uid = meta.uid or uuid.uuid4().hex
303
303
 
304
- run.spec.output_path = out_path or artifact_path or run.spec.output_path
304
+ run.spec.output_path = output_path or run.spec.output_path
305
305
 
306
306
  if not run.spec.output_path:
307
307
  if run.metadata.project:
mlrun/launcher/client.py CHANGED
@@ -72,7 +72,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
72
72
  ):
73
73
  run.metadata.labels[mlrun_constants.MLRunInternalLabels.kind] = runtime.kind
74
74
  mlrun.runtimes.utils.enrich_run_labels(
75
- run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
75
+ run.metadata.labels, [mlrun_constants.MLRunInternalLabels.owner]
76
76
  )
77
77
  if run.spec.output_path:
78
78
  run.spec.output_path = run.spec.output_path.replace(
mlrun/launcher/local.py CHANGED
@@ -55,6 +55,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
55
55
  out_path: Optional[str] = "",
56
56
  workdir: Optional[str] = "",
57
57
  artifact_path: Optional[str] = "",
58
+ output_path: Optional[str] = "",
58
59
  watch: Optional[bool] = True,
59
60
  schedule: Optional[
60
61
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -116,8 +117,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
116
117
  hyper_param_options=hyper_param_options,
117
118
  verbose=verbose,
118
119
  scrape_metrics=scrape_metrics,
119
- out_path=out_path,
120
- artifact_path=artifact_path,
120
+ output_path=output_path,
121
121
  workdir=workdir,
122
122
  notifications=notifications,
123
123
  state_thresholds=state_thresholds,
mlrun/launcher/remote.py CHANGED
@@ -45,6 +45,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
45
45
  out_path: Optional[str] = "",
46
46
  workdir: Optional[str] = "",
47
47
  artifact_path: Optional[str] = "",
48
+ output_path: Optional[str] = "",
48
49
  watch: Optional[bool] = True,
49
50
  schedule: Optional[
50
51
  Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger]
@@ -77,8 +78,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
77
78
  hyper_param_options=hyper_param_options,
78
79
  verbose=verbose,
79
80
  scrape_metrics=scrape_metrics,
80
- out_path=out_path,
81
- artifact_path=artifact_path,
81
+ output_path=output_path,
82
82
  workdir=workdir,
83
83
  notifications=notifications,
84
84
  state_thresholds=state_thresholds,
mlrun/model.py CHANGED
@@ -929,6 +929,8 @@ class RunSpec(ModelObj):
929
929
 
930
930
  _fields_to_serialize = ModelObj._fields_to_serialize + [
931
931
  "handler",
932
+ "affinity",
933
+ "tolerations",
932
934
  ]
933
935
 
934
936
  def __init__(
@@ -956,6 +958,8 @@ class RunSpec(ModelObj):
956
958
  state_thresholds=None,
957
959
  reset_on_run=None,
958
960
  node_selector=None,
961
+ tolerations=None,
962
+ affinity=None,
959
963
  ):
960
964
  # A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
961
965
  # keys (parameter names) and the values are the type hint given in the input keys after the colon.
@@ -994,6 +998,8 @@ class RunSpec(ModelObj):
994
998
  self.state_thresholds = state_thresholds or {}
995
999
  self.reset_on_run = reset_on_run
996
1000
  self.node_selector = node_selector or {}
1001
+ self.tolerations = tolerations or {}
1002
+ self.affinity = affinity or {}
997
1003
 
998
1004
  def _serialize_field(
999
1005
  self, struct: dict, field_name: Optional[str] = None, strip: bool = False
@@ -1003,6 +1009,14 @@ class RunSpec(ModelObj):
1003
1009
  if self.handler and isinstance(self.handler, str):
1004
1010
  return self.handler
1005
1011
  return None
1012
+
1013
+ # Properly serialize known K8s objects
1014
+ if field_name in {"affinity", "tolerations"}:
1015
+ value = getattr(self, field_name, None)
1016
+ if hasattr(value, "to_dict"):
1017
+ return value.to_dict()
1018
+ return value
1019
+
1006
1020
  return super()._serialize_field(struct, field_name, strip)
1007
1021
 
1008
1022
  def is_hyper_job(self):
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from .base import ModelMonitoringApplicationBase
17
16
  from .context import MonitoringApplicationContext
@@ -96,7 +96,9 @@ class _PushToMonitoringWriter(StepToDict):
96
96
  logger.debug(
97
97
  "Pushing data to output stream", writer_event=str(writer_event)
98
98
  )
99
- self.output_stream.push([writer_event])
99
+ self.output_stream.push(
100
+ [writer_event], partition_key=application_context.endpoint_id
101
+ )
100
102
  logger.debug("Pushed data to output stream successfully")
101
103
 
102
104
  def _lazy_init(self):
@@ -14,19 +14,18 @@
14
14
 
15
15
  import json
16
16
  import posixpath
17
- import uuid
18
17
  import warnings
19
18
  from abc import ABC
19
+ from tempfile import NamedTemporaryFile
20
+ from typing import Optional
20
21
 
21
- import pandas as pd
22
22
  import semver
23
- from evidently.ui.storage.local.base import METADATA_PATH, FSLocation
24
23
 
25
24
  import mlrun.model_monitoring.applications.base as mm_base
26
25
  import mlrun.model_monitoring.applications.context as mm_context
27
- from mlrun.errors import MLRunIncompatibleVersionError
26
+ from mlrun.errors import MLRunIncompatibleVersionError, MLRunValueError
28
27
 
29
- SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.6.0")
28
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.7.5")
30
29
 
31
30
 
32
31
  def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
@@ -60,36 +59,66 @@ except ModuleNotFoundError:
60
59
 
61
60
 
62
61
  if _HAS_EVIDENTLY:
63
- from evidently.suite.base_suite import Display
64
- from evidently.ui.type_aliases import STR_UUID
65
- from evidently.ui.workspace import Workspace
66
- from evidently.utils.dashboard import TemplateParams, file_html_template
62
+ from evidently.core.report import Snapshot
63
+ from evidently.legacy.ui.storage.local.base import METADATA_PATH, FSLocation
64
+ from evidently.ui.workspace import (
65
+ STR_UUID,
66
+ CloudWorkspace,
67
+ Project,
68
+ Workspace,
69
+ WorkspaceBase,
70
+ )
67
71
 
68
72
 
69
73
  class EvidentlyModelMonitoringApplicationBase(
70
74
  mm_base.ModelMonitoringApplicationBase, ABC
71
75
  ):
72
76
  def __init__(
73
- self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
77
+ self,
78
+ evidently_project_id: "STR_UUID",
79
+ evidently_workspace_path: Optional[str] = None,
80
+ cloud_workspace: bool = False,
74
81
  ) -> None:
75
82
  """
76
- A class for integrating Evidently for mlrun model monitoring within a monitoring application.
77
- Note: evidently is not installed by default in the mlrun/mlrun image.
78
- It must be installed separately to use this class.
83
+ A class for integrating Evidently for MLRun model monitoring within a monitoring application.
84
+
85
+ .. note::
86
+
87
+ The ``evidently`` package is not installed by default in the mlrun/mlrun image.
88
+ It must be installed separately to use this class.
79
89
 
80
- :param evidently_workspace_path: (str) The path to the Evidently workspace.
81
90
  :param evidently_project_id: (str) The ID of the Evidently project.
91
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
92
+ :param cloud_workspace: (bool) Whether the workspace is an Evidently Cloud workspace.
82
93
  """
83
-
84
- # TODO : more then one project (mep -> project)
85
94
  if not _HAS_EVIDENTLY:
86
95
  raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
87
- self._log_location(evidently_workspace_path)
88
- self.evidently_workspace = Workspace.create(evidently_workspace_path)
96
+ self.evidently_workspace_path = evidently_workspace_path
97
+ if cloud_workspace:
98
+ self.get_workspace = self.get_cloud_workspace
99
+ self.evidently_workspace = self.get_workspace()
89
100
  self.evidently_project_id = evidently_project_id
90
- self.evidently_project = self.evidently_workspace.get_project(
91
- evidently_project_id
92
- )
101
+ self.evidently_project = self.load_project()
102
+
103
+ def load_project(self) -> Project:
104
+ """Load the Evidently project."""
105
+ return self.evidently_workspace.get_project(self.evidently_project_id)
106
+
107
+ def get_workspace(self) -> WorkspaceBase:
108
+ """Get the Evidently workspace. Override this method for customize access to the workspace."""
109
+ if self.evidently_workspace_path:
110
+ self._log_location(self.evidently_workspace_path)
111
+ return Workspace.create(self.evidently_workspace_path)
112
+ else:
113
+ raise MLRunValueError(
114
+ "A local workspace could not be created as `evidently_workspace_path` is not set.\n"
115
+ "If you intend to use a cloud workspace, please use `cloud_workspace=True` and set the "
116
+ "`EVIDENTLY_API_KEY` environment variable. In other cases, override this method."
117
+ )
118
+
119
+ def get_cloud_workspace(self) -> CloudWorkspace:
120
+ """Load the Evidently cloud workspace according to the `EVIDENTLY_API_KEY` environment variable."""
121
+ return CloudWorkspace()
93
122
 
94
123
  @staticmethod
95
124
  def _log_location(evidently_workspace_path):
@@ -128,7 +157,7 @@ class EvidentlyModelMonitoringApplicationBase(
128
157
  @staticmethod
129
158
  def log_evidently_object(
130
159
  monitoring_context: mm_context.MonitoringApplicationContext,
131
- evidently_object: "Display",
160
+ evidently_object: "Snapshot",
132
161
  artifact_name: str,
133
162
  unique_per_endpoint: bool = True,
134
163
  ) -> None:
@@ -141,56 +170,15 @@ class EvidentlyModelMonitoringApplicationBase(
141
170
  This method should be called on special occasions only.
142
171
 
143
172
  :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
144
- :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
145
- :param artifact_name: (str) The name for the logged artifact.
146
- :param unique_per_endpoint: by default ``True``, we will log different artifact for each model endpoint,
147
- set to ``False`` without changing item key will cause artifact override.
148
- """
149
- evidently_object_html = evidently_object.get_html()
150
- monitoring_context.log_artifact(
151
- artifact_name,
152
- body=evidently_object_html.encode("utf-8"),
153
- format="html",
154
- unique_per_endpoint=unique_per_endpoint,
155
- )
156
-
157
- def log_project_dashboard(
158
- self,
159
- monitoring_context: mm_context.MonitoringApplicationContext,
160
- timestamp_start: pd.Timestamp,
161
- timestamp_end: pd.Timestamp,
162
- artifact_name: str = "dashboard",
163
- unique_per_endpoint: bool = True,
164
- ) -> None:
165
- """
166
- Logs an Evidently project dashboard.
167
-
168
- .. caution::
169
-
170
- Logging Evidently dashboards in every model monitoring window may cause scale issues.
171
- This method should be called on special occasions only.
172
-
173
- :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
174
- :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
175
- :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
173
+ :param evidently_object: (Snapshot) The Evidently run to log, e.g. a report run.
176
174
  :param artifact_name: (str) The name for the logged artifact.
177
175
  :param unique_per_endpoint: by default ``True``, we will log different artifact for each model endpoint,
178
176
  set to ``False`` without changing item key will cause artifact override.
179
177
  """
180
-
181
- dashboard_info = self.evidently_project.build_dashboard_info(
182
- timestamp_start, timestamp_end
183
- )
184
- template_params = TemplateParams(
185
- dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
186
- dashboard_info=dashboard_info,
187
- additional_graphs={},
188
- )
189
-
190
- dashboard_html = file_html_template(params=template_params)
191
- monitoring_context.log_artifact(
192
- artifact_name,
193
- body=dashboard_html.encode("utf-8"),
194
- format="html",
195
- unique_per_endpoint=unique_per_endpoint,
196
- )
178
+ with NamedTemporaryFile(suffix=".html") as file:
179
+ evidently_object.save_html(filename=file.name)
180
+ monitoring_context.log_artifact(
181
+ artifact_name,
182
+ local_path=file.name,
183
+ unique_per_endpoint=unique_per_endpoint,
184
+ )