mlrun 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (119) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +1 -40
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/functions.py +6 -1
  7. mlrun/api/api/endpoints/logs.py +17 -3
  8. mlrun/api/api/endpoints/pipelines.py +1 -5
  9. mlrun/api/api/endpoints/projects.py +88 -0
  10. mlrun/api/api/endpoints/runs.py +48 -6
  11. mlrun/api/api/endpoints/workflows.py +355 -0
  12. mlrun/api/api/utils.py +1 -1
  13. mlrun/api/crud/__init__.py +1 -0
  14. mlrun/api/crud/client_spec.py +3 -0
  15. mlrun/api/crud/model_monitoring/deployment.py +36 -7
  16. mlrun/api/crud/model_monitoring/grafana.py +1 -1
  17. mlrun/api/crud/model_monitoring/helpers.py +32 -2
  18. mlrun/api/crud/model_monitoring/model_endpoints.py +27 -5
  19. mlrun/api/crud/notifications.py +9 -4
  20. mlrun/api/crud/pipelines.py +4 -9
  21. mlrun/api/crud/runtime_resources.py +4 -3
  22. mlrun/api/crud/secrets.py +21 -0
  23. mlrun/api/crud/workflows.py +352 -0
  24. mlrun/api/db/base.py +16 -1
  25. mlrun/api/db/sqldb/db.py +97 -16
  26. mlrun/api/launcher.py +26 -7
  27. mlrun/api/main.py +3 -4
  28. mlrun/{mlutils → api/rundb}/__init__.py +2 -6
  29. mlrun/{db → api/rundb}/sqldb.py +35 -83
  30. mlrun/api/runtime_handlers/__init__.py +56 -0
  31. mlrun/api/runtime_handlers/base.py +1247 -0
  32. mlrun/api/runtime_handlers/daskjob.py +209 -0
  33. mlrun/api/runtime_handlers/kubejob.py +37 -0
  34. mlrun/api/runtime_handlers/mpijob.py +147 -0
  35. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  36. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  37. mlrun/api/utils/builder.py +1 -4
  38. mlrun/api/utils/clients/chief.py +14 -0
  39. mlrun/api/utils/scheduler.py +98 -15
  40. mlrun/api/utils/singletons/db.py +4 -0
  41. mlrun/artifacts/manager.py +1 -2
  42. mlrun/common/schemas/__init__.py +6 -0
  43. mlrun/common/schemas/auth.py +4 -1
  44. mlrun/common/schemas/client_spec.py +1 -1
  45. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  46. mlrun/common/schemas/model_monitoring/constants.py +11 -0
  47. mlrun/common/schemas/project.py +1 -0
  48. mlrun/common/schemas/runs.py +1 -8
  49. mlrun/common/schemas/schedule.py +1 -8
  50. mlrun/common/schemas/workflow.py +54 -0
  51. mlrun/config.py +42 -40
  52. mlrun/datastore/sources.py +1 -1
  53. mlrun/db/__init__.py +4 -68
  54. mlrun/db/base.py +12 -0
  55. mlrun/db/factory.py +65 -0
  56. mlrun/db/httpdb.py +175 -19
  57. mlrun/db/nopdb.py +4 -2
  58. mlrun/execution.py +4 -2
  59. mlrun/feature_store/__init__.py +1 -0
  60. mlrun/feature_store/api.py +1 -2
  61. mlrun/feature_store/feature_set.py +0 -10
  62. mlrun/feature_store/feature_vector.py +340 -2
  63. mlrun/feature_store/ingestion.py +5 -10
  64. mlrun/feature_store/retrieval/base.py +118 -104
  65. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  66. mlrun/feature_store/retrieval/job.py +4 -1
  67. mlrun/feature_store/retrieval/local_merger.py +18 -18
  68. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  69. mlrun/feature_store/retrieval/storey_merger.py +21 -15
  70. mlrun/kfpops.py +3 -9
  71. mlrun/launcher/base.py +3 -3
  72. mlrun/launcher/client.py +3 -2
  73. mlrun/launcher/factory.py +16 -13
  74. mlrun/lists.py +0 -11
  75. mlrun/model.py +9 -15
  76. mlrun/model_monitoring/helpers.py +15 -25
  77. mlrun/model_monitoring/model_monitoring_batch.py +72 -4
  78. mlrun/model_monitoring/prometheus.py +219 -0
  79. mlrun/model_monitoring/stores/__init__.py +15 -9
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +3 -1
  81. mlrun/model_monitoring/stream_processing.py +181 -29
  82. mlrun/package/packager.py +6 -8
  83. mlrun/package/packagers/default_packager.py +121 -10
  84. mlrun/platforms/__init__.py +0 -2
  85. mlrun/platforms/iguazio.py +0 -56
  86. mlrun/projects/pipelines.py +57 -158
  87. mlrun/projects/project.py +6 -32
  88. mlrun/render.py +1 -1
  89. mlrun/run.py +2 -124
  90. mlrun/runtimes/__init__.py +6 -42
  91. mlrun/runtimes/base.py +26 -1241
  92. mlrun/runtimes/daskjob.py +2 -198
  93. mlrun/runtimes/function.py +16 -5
  94. mlrun/runtimes/kubejob.py +5 -29
  95. mlrun/runtimes/mpijob/__init__.py +2 -2
  96. mlrun/runtimes/mpijob/abstract.py +10 -1
  97. mlrun/runtimes/mpijob/v1.py +0 -76
  98. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  99. mlrun/runtimes/nuclio.py +3 -2
  100. mlrun/runtimes/pod.py +0 -10
  101. mlrun/runtimes/remotesparkjob.py +1 -15
  102. mlrun/runtimes/serving.py +1 -1
  103. mlrun/runtimes/sparkjob/__init__.py +0 -1
  104. mlrun/runtimes/sparkjob/abstract.py +4 -131
  105. mlrun/serving/states.py +1 -1
  106. mlrun/utils/db.py +0 -2
  107. mlrun/utils/helpers.py +19 -13
  108. mlrun/utils/notifications/notification_pusher.py +5 -25
  109. mlrun/utils/regex.py +7 -2
  110. mlrun/utils/version/version.json +2 -2
  111. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +24 -23
  112. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +116 -107
  113. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  114. mlrun/mlutils/data.py +0 -160
  115. mlrun/mlutils/models.py +0 -78
  116. mlrun/mlutils/plots.py +0 -902
  117. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  118. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  119. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
mlrun/runtimes/daskjob.py CHANGED
@@ -15,33 +15,25 @@ import datetime
15
15
  import inspect
16
16
  import socket
17
17
  import time
18
- import typing
19
18
  from os import environ
20
- from typing import Dict, List, Optional, Union
21
-
22
- from deprecated import deprecated
23
- from kubernetes.client.rest import ApiException
24
- from sqlalchemy.orm import Session
25
19
 
26
20
  import mlrun.common.schemas
27
21
  import mlrun.errors
28
22
  import mlrun.k8s_utils
29
23
  import mlrun.utils
30
24
  import mlrun.utils.regex
31
- from mlrun.api.db.base import DBInterface
32
25
  from mlrun.errors import err_to_str
33
- from mlrun.runtimes.base import BaseRuntimeHandler
34
26
 
35
27
  from ..config import config
36
28
  from ..execution import MLClientCtx
37
29
  from ..model import RunObject
38
30
  from ..render import ipython_display
39
31
  from ..utils import logger, normalize_name, update_in
40
- from .base import FunctionStatus, RuntimeClassMode
32
+ from .base import FunctionStatus
41
33
  from .kubejob import KubejobRuntime
42
34
  from .local import exec_from_params, load_module
43
35
  from .pod import KubeResourceSpec, kube_resource_spec_to_pod_spec
44
- from .utils import RunError, get_func_selector, get_k8s, get_resource_labels, log_std
36
+ from .utils import RunError, get_func_selector, get_resource_labels, log_std
45
37
 
46
38
 
47
39
  def get_dask_resource():
@@ -406,16 +398,6 @@ class DaskCluster(KubejobRuntime):
406
398
  show_on_failure=show_on_failure,
407
399
  )
408
400
 
409
- # TODO: Remove in 1.5.0
410
- @deprecated(
411
- version="1.3.0",
412
- reason="'Dask gpus' will be removed in 1.5.0, use 'with_scheduler_limits' / 'with_worker_limits' instead",
413
- category=FutureWarning,
414
- )
415
- def gpus(self, gpus, gpu_type="nvidia.com/gpu"):
416
- update_in(self.spec.scheduler_resources, ["limits", gpu_type], gpus)
417
- update_in(self.spec.worker_resources, ["limits", gpu_type], gpus)
418
-
419
401
  def with_limits(
420
402
  self,
421
403
  mem=None,
@@ -686,181 +668,3 @@ def get_obj_status(selector=None, namespace=None):
686
668
  f"found dask function {pod.metadata.name} in non ready state ({status})"
687
669
  )
688
670
  return status
689
-
690
-
691
- class DaskRuntimeHandler(BaseRuntimeHandler):
692
- kind = "dask"
693
- class_modes = {RuntimeClassMode.run: "dask"}
694
-
695
- # Dask runtime resources are per function (and not per run).
696
- # It means that monitoring runtime resources state doesn't say anything about the run state.
697
- # Therefore dask run monitoring is done completely by the SDK, so overriding the monitoring method with no logic
698
- def monitor_runs(
699
- self, db: DBInterface, db_session: Session, leader_session: Optional[str] = None
700
- ):
701
- return
702
-
703
- @staticmethod
704
- def _get_object_label_selector(object_id: str) -> str:
705
- return f"mlrun/function={object_id}"
706
-
707
- @staticmethod
708
- def resolve_object_id(
709
- run: dict,
710
- ) -> typing.Optional[str]:
711
- """
712
- Resolves the object ID from the run object.
713
- In dask runtime, the object ID is the function name.
714
- :param run: run object
715
- :return: function name
716
- """
717
-
718
- function = run.get("spec", {}).get("function", None)
719
- if function:
720
-
721
- # a dask run's function field is in the format <project-name>/<function-name>@<run-uid>
722
- # we only want the function name
723
- project_and_function = function.split("@")[0]
724
- return project_and_function.split("/")[-1]
725
-
726
- return None
727
-
728
- def _enrich_list_resources_response(
729
- self,
730
- response: Union[
731
- mlrun.common.schemas.RuntimeResources,
732
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
733
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
734
- ],
735
- namespace: str,
736
- label_selector: str = None,
737
- group_by: Optional[
738
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
739
- ] = None,
740
- ) -> Union[
741
- mlrun.common.schemas.RuntimeResources,
742
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
743
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
744
- ]:
745
- """
746
- Handling listing service resources
747
- """
748
- enrich_needed = self._validate_if_enrich_is_needed_by_group_by(group_by)
749
- if not enrich_needed:
750
- return response
751
- services = get_k8s().v1api.list_namespaced_service(
752
- namespace, label_selector=label_selector
753
- )
754
- service_resources = []
755
- for service in services.items:
756
- service_resources.append(
757
- mlrun.common.schemas.RuntimeResource(
758
- name=service.metadata.name, labels=service.metadata.labels
759
- )
760
- )
761
- return self._enrich_service_resources_in_response(
762
- response, service_resources, group_by
763
- )
764
-
765
- def _build_output_from_runtime_resources(
766
- self,
767
- response: Union[
768
- mlrun.common.schemas.RuntimeResources,
769
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
770
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
771
- ],
772
- runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
773
- group_by: Optional[
774
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
775
- ] = None,
776
- ):
777
- enrich_needed = self._validate_if_enrich_is_needed_by_group_by(group_by)
778
- if not enrich_needed:
779
- return response
780
- service_resources = []
781
- for runtime_resources in runtime_resources_list:
782
- if runtime_resources.service_resources:
783
- service_resources += runtime_resources.service_resources
784
- return self._enrich_service_resources_in_response(
785
- response, service_resources, group_by
786
- )
787
-
788
- def _validate_if_enrich_is_needed_by_group_by(
789
- self,
790
- group_by: Optional[
791
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
792
- ] = None,
793
- ) -> bool:
794
- # Dask runtime resources are per function (and not per job) therefore, when grouping by job we're simply
795
- # omitting the dask runtime resources
796
- if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.job:
797
- return False
798
- elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
799
- return True
800
- elif group_by is not None:
801
- raise NotImplementedError(
802
- f"Provided group by field is not supported. group_by={group_by}"
803
- )
804
- return True
805
-
806
- def _enrich_service_resources_in_response(
807
- self,
808
- response: Union[
809
- mlrun.common.schemas.RuntimeResources,
810
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
811
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
812
- ],
813
- service_resources: List[mlrun.common.schemas.RuntimeResource],
814
- group_by: Optional[
815
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
816
- ] = None,
817
- ):
818
- if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
819
- for service_resource in service_resources:
820
- self._add_resource_to_grouped_by_project_resources_response(
821
- response, "service_resources", service_resource
822
- )
823
- else:
824
- response.service_resources = service_resources
825
- return response
826
-
827
- def _delete_extra_resources(
828
- self,
829
- db: DBInterface,
830
- db_session: Session,
831
- namespace: str,
832
- deleted_resources: List[Dict],
833
- label_selector: str = None,
834
- force: bool = False,
835
- grace_period: int = None,
836
- ):
837
- """
838
- Handling services deletion
839
- """
840
- if grace_period is None:
841
- grace_period = config.runtime_resources_deletion_grace_period
842
- service_names = []
843
- for pod_dict in deleted_resources:
844
- dask_component = (
845
- pod_dict["metadata"].get("labels", {}).get("dask.org/component")
846
- )
847
- cluster_name = (
848
- pod_dict["metadata"].get("labels", {}).get("dask.org/cluster-name")
849
- )
850
- if dask_component == "scheduler" and cluster_name:
851
- service_names.append(cluster_name)
852
-
853
- services = get_k8s().v1api.list_namespaced_service(
854
- namespace, label_selector=label_selector
855
- )
856
- for service in services.items:
857
- try:
858
- if force or service.metadata.name in service_names:
859
- get_k8s().v1api.delete_namespaced_service(
860
- service.metadata.name, namespace
861
- )
862
- logger.info(f"Deleted service: {service.metadata.name}")
863
- except ApiException as exc:
864
- # ignore error if service is already removed
865
- if exc.status != 404:
866
- raise
@@ -28,11 +28,12 @@ from kubernetes import client
28
28
  from nuclio.deploy import find_dashboard_url, get_deploy_status
29
29
  from nuclio.triggers import V3IOStreamTrigger
30
30
 
31
+ import mlrun.db
31
32
  import mlrun.errors
32
33
  import mlrun.k8s_utils
33
34
  import mlrun.utils
35
+ import mlrun.utils.helpers
34
36
  from mlrun.common.schemas import AuthInfo
35
- from mlrun.db import RunDBError
36
37
 
37
38
  from ..config import config as mlconf
38
39
  from ..errors import err_to_str
@@ -488,6 +489,16 @@ class RemoteRuntime(KubeResource):
488
489
  endpoint = None
489
490
  if "://" in stream_path:
490
491
  endpoint, stream_path = parse_path(stream_path, suffix="")
492
+
493
+ # verify v3io stream trigger name is valid
494
+ mlrun.utils.helpers.validate_v3io_stream_consumer_group(group)
495
+
496
+ consumer_group = kwargs.pop("consumerGroup", None)
497
+ if consumer_group:
498
+ logger.warning(
499
+ "consumerGroup kwargs value is ignored. use group argument instead"
500
+ )
501
+
491
502
  container, path = split_path(stream_path)
492
503
  shards = shards or 1
493
504
  extra_attributes = extra_attributes or {}
@@ -603,7 +614,7 @@ class RemoteRuntime(KubeResource):
603
614
  text, last_log_timestamp = db.get_builder_status(
604
615
  self, last_log_timestamp=last_log_timestamp, verbose=verbose
605
616
  )
606
- except RunDBError:
617
+ except mlrun.db.RunDBError:
607
618
  raise ValueError("function or deploy process not found")
608
619
  state = self.status.state
609
620
  if text:
@@ -714,7 +725,7 @@ class RemoteRuntime(KubeResource):
714
725
  text, last_log_timestamp = self._get_db().get_builder_status(
715
726
  self, last_log_timestamp=last_log_timestamp, verbose=verbose
716
727
  )
717
- except RunDBError:
728
+ except mlrun.db.RunDBError:
718
729
  if raise_on_exception:
719
730
  return "", "", None
720
731
  raise ValueError("function or deploy process not found")
@@ -725,8 +736,8 @@ class RemoteRuntime(KubeResource):
725
736
  runtime_env = {
726
737
  "MLRUN_DEFAULT_PROJECT": self.metadata.project or mlconf.default_project,
727
738
  }
728
- if self.spec.rundb or mlconf.httpdb.api_url:
729
- runtime_env["MLRUN_DBPATH"] = self.spec.rundb or mlconf.httpdb.api_url
739
+ if mlconf.httpdb.api_url:
740
+ runtime_env["MLRUN_DBPATH"] = mlconf.httpdb.api_url
730
741
  if mlconf.namespace:
731
742
  runtime_env["MLRUN_NAMESPACE"] = mlconf.namespace
732
743
  if self.metadata.credentials.access_key:
mlrun/runtimes/kubejob.py CHANGED
@@ -20,15 +20,14 @@ from kubernetes import client
20
20
  from kubernetes.client.rest import ApiException
21
21
 
22
22
  import mlrun.common.schemas
23
+ import mlrun.db
23
24
  import mlrun.errors
24
- from mlrun.runtimes.base import BaseRuntimeHandler
25
25
 
26
- from ..db import RunDBError
27
26
  from ..errors import err_to_str
28
27
  from ..kfpops import build_op
29
28
  from ..model import RunObject
30
29
  from ..utils import get_in, logger
31
- from .base import RunError, RuntimeClassMode
30
+ from .base import RunError
32
31
  from .pod import KubeResource, kube_resource_spec_to_pod_spec
33
32
  from .utils import get_k8s
34
33
 
@@ -257,7 +256,7 @@ class KubejobRuntime(KubeResource):
257
256
  offset = 0
258
257
  try:
259
258
  text, _ = db.get_builder_status(self, 0, logs=logs)
260
- except RunDBError:
259
+ except mlrun.db.RunDBError:
261
260
  raise ValueError("function or build process not found")
262
261
 
263
262
  def print_log(text):
@@ -360,10 +359,8 @@ class KubejobRuntime(KubeResource):
360
359
 
361
360
  if self.spec.clone_target_dir:
362
361
  workdir = workdir or ""
363
- if workdir.startswith("./"):
364
- # TODO: use 'removeprefix' when we drop python 3.7 support
365
- # workdir.removeprefix("./")
366
- workdir = workdir[2:]
362
+ workdir = workdir.removeprefix("./")
363
+
367
364
  return os.path.join(self.spec.clone_target_dir, workdir)
368
365
 
369
366
  return workdir
@@ -390,24 +387,3 @@ def func_to_pod(image, runtime, extra_env, command, args, workdir):
390
387
  ]
391
388
 
392
389
  return pod_spec
393
-
394
-
395
- class KubeRuntimeHandler(BaseRuntimeHandler):
396
- kind = "job"
397
- class_modes = {RuntimeClassMode.run: "job", RuntimeClassMode.build: "build"}
398
-
399
- @staticmethod
400
- def _expect_pods_without_uid() -> bool:
401
- """
402
- builder pods are handled as part of this runtime handler - they are not coupled to run object, therefore they
403
- don't have the uid in their labels
404
- """
405
- return True
406
-
407
- @staticmethod
408
- def _are_resources_coupled_to_run_object() -> bool:
409
- return True
410
-
411
- @staticmethod
412
- def _get_object_label_selector(object_id: str) -> str:
413
- return f"mlrun/uid={object_id}"
@@ -14,5 +14,5 @@
14
14
 
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
- from .v1 import MpiRuntimeV1, MpiV1RuntimeHandler
18
- from .v1alpha1 import MpiRuntimeV1Alpha1, MpiV1Alpha1RuntimeHandler
17
+ from .v1 import MpiRuntimeV1
18
+ from .v1alpha1 import MpiRuntimeV1Alpha1
@@ -25,7 +25,7 @@ from mlrun.model import RunObject
25
25
  from mlrun.runtimes.kubejob import KubejobRuntime
26
26
  from mlrun.runtimes.pod import KubeResourceSpec
27
27
  from mlrun.runtimes.utils import RunError, get_k8s
28
- from mlrun.utils import get_in, logger
28
+ from mlrun.utils import get_in, logger, update_in
29
29
 
30
30
 
31
31
  class MPIResourceSpec(KubeResourceSpec):
@@ -138,6 +138,15 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
138
138
  def _get_crd_info() -> typing.Tuple[str, str, str]:
139
139
  pass
140
140
 
141
+ @staticmethod
142
+ def _get_run_completion_updates(run: dict) -> dict:
143
+
144
+ # TODO: add a 'workers' section in run objects state, each worker will update its state while
145
+ # the run state will be resolved by the server.
146
+ # update the run object state if empty so that it won't default to 'created' state
147
+ update_in(run, "status.state", "running", append=False, replace=False)
148
+ return {}
149
+
141
150
  def _pretty_print_jobs(self, items: typing.List):
142
151
  print(f"{'status':10} {'name':20} {'start':21} end")
143
152
  for i in items:
@@ -13,17 +13,13 @@
13
13
  # limitations under the License.
14
14
  import typing
15
15
  from copy import deepcopy
16
- from datetime import datetime
17
16
 
18
17
  from kubernetes import client
19
- from sqlalchemy.orm import Session
20
18
 
21
19
  import mlrun.runtimes.pod
22
- from mlrun.api.db.base import DBInterface
23
20
  from mlrun.config import config as mlconf
24
21
  from mlrun.execution import MLClientCtx
25
22
  from mlrun.model import RunObject
26
- from mlrun.runtimes.base import BaseRuntimeHandler, RunStates, RuntimeClassMode
27
23
  from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
28
24
  from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime, MPIResourceSpec
29
25
  from mlrun.utils import get_in, update_in
@@ -318,75 +314,3 @@ class MpiRuntimeV1(AbstractMPIJobRuntime):
318
314
  MpiRuntimeV1.crd_version,
319
315
  MpiRuntimeV1.crd_plural,
320
316
  )
321
-
322
-
323
- class MpiV1RuntimeHandler(BaseRuntimeHandler):
324
- kind = "mpijob"
325
- class_modes = {
326
- RuntimeClassMode.run: "mpijob",
327
- }
328
-
329
- def _resolve_crd_object_status_info(
330
- self, db: DBInterface, db_session: Session, crd_object
331
- ) -> typing.Tuple[bool, typing.Optional[datetime], typing.Optional[str]]:
332
- """
333
- https://github.com/kubeflow/mpi-operator/blob/master/pkg/apis/kubeflow/v1/types.go#L29
334
- https://github.com/kubeflow/common/blob/master/pkg/apis/common/v1/types.go#L55
335
- """
336
- launcher_status = (
337
- crd_object.get("status", {}).get("replicaStatuses", {}).get("Launcher", {})
338
- )
339
- # the launcher status also has running property, but it's empty for
340
- # short period after the creation, so we're
341
- # checking terminal state by the completion time existence
342
- in_terminal_state = (
343
- crd_object.get("status", {}).get("completionTime", None) is not None
344
- )
345
- desired_run_state = RunStates.running
346
- completion_time = None
347
- if in_terminal_state:
348
- completion_time = datetime.fromisoformat(
349
- crd_object.get("status", {})
350
- .get("completionTime")
351
- .replace("Z", "+00:00")
352
- )
353
- desired_run_state = (
354
- RunStates.error
355
- if launcher_status.get("failed", 0) > 0
356
- else RunStates.completed
357
- )
358
- return in_terminal_state, completion_time, desired_run_state
359
-
360
- @staticmethod
361
- def _are_resources_coupled_to_run_object() -> bool:
362
- return True
363
-
364
- @staticmethod
365
- def _get_object_label_selector(object_id: str) -> str:
366
- return f"mlrun/uid={object_id}"
367
-
368
- @staticmethod
369
- def _get_main_runtime_resource_label_selector() -> str:
370
- """
371
- There are some runtimes which might have multiple k8s resources attached to a one runtime, in this case
372
- we don't want to pull logs from all but rather only for the "driver"/"launcher" etc
373
- :return: the label selector
374
- """
375
- return "mpi-job-role=launcher"
376
-
377
- @staticmethod
378
- def _get_run_completion_updates(run: dict) -> dict:
379
-
380
- # TODO: add a 'workers' section in run objects state, each worker will update its state while
381
- # the run state will be resolved by the server.
382
- # update the run object state if empty so that it won't default to 'created' state
383
- update_in(run, "status.state", "running", append=False, replace=False)
384
- return {}
385
-
386
- @staticmethod
387
- def _get_crd_info() -> typing.Tuple[str, str, str]:
388
- return (
389
- MpiRuntimeV1.crd_group,
390
- MpiRuntimeV1.crd_version,
391
- MpiRuntimeV1.crd_plural,
392
- )
@@ -13,18 +13,14 @@
13
13
  # limitations under the License.
14
14
  import typing
15
15
  from copy import deepcopy
16
- from datetime import datetime
17
16
 
18
17
  from kubernetes import client
19
- from sqlalchemy.orm import Session
20
18
 
21
19
  import mlrun.runtimes.pod
22
- from mlrun.api.db.base import DBInterface
23
20
  from mlrun.config import config as mlconf
24
21
  from mlrun.execution import MLClientCtx
25
22
  from mlrun.model import RunObject
26
- from mlrun.runtimes.base import BaseRuntimeHandler, RunStates, RuntimeClassMode
27
- from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1Alpha1States
23
+ from mlrun.runtimes.constants import MPIJobCRDVersions
28
24
  from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime
29
25
  from mlrun.utils import get_in, update_in
30
26
 
@@ -156,72 +152,3 @@ class MpiRuntimeV1Alpha1(AbstractMPIJobRuntime):
156
152
  MpiRuntimeV1Alpha1.crd_version,
157
153
  MpiRuntimeV1Alpha1.crd_plural,
158
154
  )
159
-
160
-
161
- class MpiV1Alpha1RuntimeHandler(BaseRuntimeHandler):
162
- kind = "mpijob"
163
- class_modes = {
164
- RuntimeClassMode.run: "mpijob",
165
- }
166
-
167
- def _resolve_crd_object_status_info(
168
- self, db: DBInterface, db_session: Session, crd_object
169
- ) -> typing.Tuple[bool, typing.Optional[datetime], typing.Optional[str]]:
170
- """
171
- https://github.com/kubeflow/mpi-operator/blob/master/pkg/apis/kubeflow/v1alpha1/types.go#L115
172
- """
173
- launcher_status = crd_object.get("status", {}).get("launcherStatus", "")
174
- in_terminal_state = launcher_status in MPIJobV1Alpha1States.terminal_states()
175
- desired_run_state = MPIJobV1Alpha1States.mpijob_state_to_run_state(
176
- launcher_status
177
- )
178
- completion_time = None
179
- if in_terminal_state:
180
- completion_time = datetime.fromisoformat(
181
- crd_object.get("status", {})
182
- .get("completionTime")
183
- .replace("Z", "+00:00")
184
- )
185
- desired_run_state = {
186
- "Succeeded": RunStates.completed,
187
- "Failed": RunStates.error,
188
- }[launcher_status]
189
- return in_terminal_state, completion_time, desired_run_state
190
-
191
- @staticmethod
192
- def _are_resources_coupled_to_run_object() -> bool:
193
- return True
194
-
195
- @staticmethod
196
- def _get_object_label_selector(object_id: str) -> str:
197
- return f"mlrun/uid={object_id}"
198
-
199
- @staticmethod
200
- def _get_main_runtime_resource_label_selector() -> str:
201
- """
202
- There are some runtimes which might have multiple k8s resources attached to a one runtime, in this case
203
- we don't want to pull logs from all but rather only for the "driver"/"launcher" etc
204
- :return: the label selector
205
- """
206
- return "mpi_role_type=launcher"
207
-
208
- @staticmethod
209
- def _get_run_completion_updates(run: dict) -> dict:
210
-
211
- # TODO: add a 'workers' section in run objects state, each worker will update its state while
212
- # the run state will be resolved by the server.
213
- # update the run object state if empty so that it won't default to 'created' state
214
- update_in(run, "status.state", "running", append=False, replace=False)
215
- return {}
216
-
217
- @staticmethod
218
- def _get_crd_info() -> typing.Tuple[str, str, str]:
219
- return (
220
- MpiRuntimeV1Alpha1.crd_group,
221
- MpiRuntimeV1Alpha1.crd_version,
222
- MpiRuntimeV1Alpha1.crd_plural,
223
- )
224
-
225
- @staticmethod
226
- def _get_crd_object_status(crd_object) -> str:
227
- return crd_object.get("status", {}).get("launcherStatus", "")
mlrun/runtimes/nuclio.py CHANGED
@@ -16,7 +16,8 @@ import json
16
16
  import os
17
17
  import socket
18
18
 
19
- from ..db import get_or_set_dburl
19
+ import mlrun.db
20
+
20
21
  from ..errors import err_to_str
21
22
  from ..execution import MLClientCtx
22
23
  from ..model import RunTemplate
@@ -60,7 +61,7 @@ def nuclio_jobs_handler(context, event):
60
61
  status_code=400,
61
62
  )
62
63
 
63
- out = get_or_set_dburl()
64
+ out = mlrun.db.get_or_set_dburl()
64
65
  if out:
65
66
  context.logger.info(f"logging run results to: {out}")
66
67
 
mlrun/runtimes/pod.py CHANGED
@@ -20,7 +20,6 @@ from enum import Enum
20
20
  import dotenv
21
21
  import kfp.dsl
22
22
  import kubernetes.client as k8s_client
23
- from deprecated import deprecated
24
23
 
25
24
  import mlrun.errors
26
25
  import mlrun.utils.regex
@@ -1014,15 +1013,6 @@ class KubeResource(BaseRuntime):
1014
1013
  self.set_env(name, value)
1015
1014
  return self
1016
1015
 
1017
- # TODO: Remove in 1.5.0
1018
- @deprecated(
1019
- version="1.3.0",
1020
- reason="'Job gpus' will be removed in 1.5.0, use 'with_limits' instead",
1021
- category=FutureWarning,
1022
- )
1023
- def gpus(self, gpus, gpu_type="nvidia.com/gpu"):
1024
- update_in(self.spec.resources, ["limits", gpu_type], gpus)
1025
-
1026
1016
  def set_image_pull_configuration(
1027
1017
  self, image_pull_policy: str = None, image_pull_secret_name: str = None
1028
1018
  ):
@@ -21,8 +21,7 @@ from mlrun.config import config
21
21
 
22
22
  from ..model import RunObject
23
23
  from ..platforms.iguazio import mount_v3io, mount_v3iod
24
- from .base import RuntimeClassMode
25
- from .kubejob import KubejobRuntime, KubeRuntimeHandler
24
+ from .kubejob import KubejobRuntime
26
25
  from .pod import KubeResourceSpec
27
26
 
28
27
 
@@ -210,18 +209,5 @@ class RemoteSparkRuntime(KubejobRuntime):
210
209
  )
211
210
 
212
211
 
213
- class RemoteSparkRuntimeHandler(KubeRuntimeHandler):
214
- kind = "remote-spark"
215
- class_modes = {RuntimeClassMode.run: "remote-spark"}
216
-
217
- @staticmethod
218
- def _are_resources_coupled_to_run_object() -> bool:
219
- return True
220
-
221
- @staticmethod
222
- def _get_object_label_selector(object_id: str) -> str:
223
- return f"mlrun/uid={object_id}"
224
-
225
-
226
212
  def igz_spark_pre_hook():
227
213
  run(["/bin/bash", "/etc/config/v3io/spark-job-init.sh"])
mlrun/runtimes/serving.py CHANGED
@@ -474,7 +474,7 @@ class ServingRuntime(RemoteRuntime):
474
474
  child_function = self._spec.function_refs[function_name]
475
475
  trigger_args = stream.trigger_args or {}
476
476
 
477
- if mlrun.mlconf.httpdb.nuclio.explicit_ack == "enabled":
477
+ if mlrun.mlconf.is_explicit_ack():
478
478
  trigger_args["explicit_ack_mode"] = trigger_args.get(
479
479
  "explicit_ack_mode", "explicitOnly"
480
480
  )
@@ -14,5 +14,4 @@
14
14
 
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
- from .abstract import SparkRuntimeHandler
18
17
  from .spark3job import Spark3Runtime