mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (184) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +3 -41
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/frontend_spec.py +2 -1
  7. mlrun/api/api/endpoints/functions.py +95 -59
  8. mlrun/api/api/endpoints/grafana_proxy.py +9 -9
  9. mlrun/api/api/endpoints/logs.py +17 -3
  10. mlrun/api/api/endpoints/model_endpoints.py +3 -2
  11. mlrun/api/api/endpoints/pipelines.py +1 -5
  12. mlrun/api/api/endpoints/projects.py +88 -0
  13. mlrun/api/api/endpoints/runs.py +48 -6
  14. mlrun/api/api/endpoints/submit.py +2 -1
  15. mlrun/api/api/endpoints/workflows.py +355 -0
  16. mlrun/api/api/utils.py +3 -4
  17. mlrun/api/crud/__init__.py +1 -0
  18. mlrun/api/crud/client_spec.py +6 -2
  19. mlrun/api/crud/feature_store.py +5 -0
  20. mlrun/api/crud/model_monitoring/__init__.py +1 -0
  21. mlrun/api/crud/model_monitoring/deployment.py +497 -0
  22. mlrun/api/crud/model_monitoring/grafana.py +96 -42
  23. mlrun/api/crud/model_monitoring/helpers.py +159 -0
  24. mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
  25. mlrun/api/crud/notifications.py +9 -4
  26. mlrun/api/crud/pipelines.py +6 -11
  27. mlrun/api/crud/projects.py +2 -2
  28. mlrun/api/crud/runtime_resources.py +4 -3
  29. mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
  30. mlrun/api/crud/secrets.py +21 -0
  31. mlrun/api/crud/workflows.py +352 -0
  32. mlrun/api/db/base.py +16 -1
  33. mlrun/api/db/init_db.py +2 -4
  34. mlrun/api/db/session.py +1 -1
  35. mlrun/api/db/sqldb/db.py +129 -31
  36. mlrun/api/db/sqldb/models/models_mysql.py +15 -1
  37. mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
  38. mlrun/api/launcher.py +38 -6
  39. mlrun/api/main.py +3 -2
  40. mlrun/api/rundb/__init__.py +13 -0
  41. mlrun/{db → api/rundb}/sqldb.py +36 -84
  42. mlrun/api/runtime_handlers/__init__.py +56 -0
  43. mlrun/api/runtime_handlers/base.py +1247 -0
  44. mlrun/api/runtime_handlers/daskjob.py +209 -0
  45. mlrun/api/runtime_handlers/kubejob.py +37 -0
  46. mlrun/api/runtime_handlers/mpijob.py +147 -0
  47. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  48. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  49. mlrun/api/schemas/__init__.py +17 -6
  50. mlrun/api/utils/builder.py +1 -4
  51. mlrun/api/utils/clients/chief.py +14 -0
  52. mlrun/api/utils/clients/iguazio.py +33 -33
  53. mlrun/api/utils/clients/nuclio.py +2 -2
  54. mlrun/api/utils/periodic.py +9 -2
  55. mlrun/api/utils/projects/follower.py +14 -7
  56. mlrun/api/utils/projects/leader.py +2 -1
  57. mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
  58. mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
  59. mlrun/api/utils/runtimes/__init__.py +14 -0
  60. mlrun/api/utils/runtimes/nuclio.py +43 -0
  61. mlrun/api/utils/scheduler.py +98 -15
  62. mlrun/api/utils/singletons/db.py +5 -1
  63. mlrun/api/utils/singletons/project_member.py +4 -1
  64. mlrun/api/utils/singletons/scheduler.py +1 -1
  65. mlrun/artifacts/base.py +6 -6
  66. mlrun/artifacts/dataset.py +4 -4
  67. mlrun/artifacts/manager.py +2 -3
  68. mlrun/artifacts/model.py +2 -2
  69. mlrun/artifacts/plots.py +8 -8
  70. mlrun/common/db/__init__.py +14 -0
  71. mlrun/common/helpers.py +37 -0
  72. mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
  73. mlrun/common/model_monitoring/helpers.py +69 -0
  74. mlrun/common/schemas/__init__.py +13 -1
  75. mlrun/common/schemas/auth.py +4 -1
  76. mlrun/common/schemas/client_spec.py +1 -1
  77. mlrun/common/schemas/function.py +17 -0
  78. mlrun/common/schemas/model_monitoring/__init__.py +48 -0
  79. mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
  80. mlrun/common/schemas/model_monitoring/grafana.py +55 -0
  81. mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
  82. mlrun/common/schemas/notification.py +1 -0
  83. mlrun/common/schemas/object.py +4 -0
  84. mlrun/common/schemas/project.py +1 -0
  85. mlrun/common/schemas/regex.py +1 -1
  86. mlrun/common/schemas/runs.py +1 -8
  87. mlrun/common/schemas/schedule.py +1 -8
  88. mlrun/common/schemas/workflow.py +54 -0
  89. mlrun/config.py +45 -42
  90. mlrun/datastore/__init__.py +21 -0
  91. mlrun/datastore/base.py +1 -1
  92. mlrun/datastore/datastore.py +9 -0
  93. mlrun/datastore/dbfs_store.py +168 -0
  94. mlrun/datastore/helpers.py +18 -0
  95. mlrun/datastore/sources.py +1 -0
  96. mlrun/datastore/store_resources.py +2 -5
  97. mlrun/datastore/v3io.py +1 -2
  98. mlrun/db/__init__.py +4 -68
  99. mlrun/db/base.py +12 -0
  100. mlrun/db/factory.py +65 -0
  101. mlrun/db/httpdb.py +175 -20
  102. mlrun/db/nopdb.py +4 -2
  103. mlrun/execution.py +4 -2
  104. mlrun/feature_store/__init__.py +1 -0
  105. mlrun/feature_store/api.py +1 -2
  106. mlrun/feature_store/common.py +2 -1
  107. mlrun/feature_store/feature_set.py +1 -11
  108. mlrun/feature_store/feature_vector.py +340 -2
  109. mlrun/feature_store/ingestion.py +5 -10
  110. mlrun/feature_store/retrieval/base.py +118 -104
  111. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  112. mlrun/feature_store/retrieval/job.py +4 -1
  113. mlrun/feature_store/retrieval/local_merger.py +18 -18
  114. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  115. mlrun/feature_store/retrieval/storey_merger.py +22 -16
  116. mlrun/kfpops.py +3 -9
  117. mlrun/launcher/base.py +57 -53
  118. mlrun/launcher/client.py +5 -4
  119. mlrun/launcher/factory.py +24 -13
  120. mlrun/launcher/local.py +6 -6
  121. mlrun/launcher/remote.py +4 -4
  122. mlrun/lists.py +0 -11
  123. mlrun/model.py +11 -17
  124. mlrun/model_monitoring/__init__.py +2 -22
  125. mlrun/model_monitoring/features_drift_table.py +1 -1
  126. mlrun/model_monitoring/helpers.py +22 -210
  127. mlrun/model_monitoring/model_endpoint.py +1 -1
  128. mlrun/model_monitoring/model_monitoring_batch.py +127 -50
  129. mlrun/model_monitoring/prometheus.py +219 -0
  130. mlrun/model_monitoring/stores/__init__.py +16 -11
  131. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
  132. mlrun/model_monitoring/stores/models/mysql.py +47 -29
  133. mlrun/model_monitoring/stores/models/sqlite.py +47 -29
  134. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
  135. mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
  136. mlrun/model_monitoring/tracking_policy.py +104 -0
  137. mlrun/package/packager.py +6 -8
  138. mlrun/package/packagers/default_packager.py +121 -10
  139. mlrun/package/packagers/numpy_packagers.py +1 -1
  140. mlrun/platforms/__init__.py +0 -2
  141. mlrun/platforms/iguazio.py +0 -56
  142. mlrun/projects/pipelines.py +53 -159
  143. mlrun/projects/project.py +10 -37
  144. mlrun/render.py +1 -1
  145. mlrun/run.py +8 -124
  146. mlrun/runtimes/__init__.py +6 -42
  147. mlrun/runtimes/base.py +29 -1249
  148. mlrun/runtimes/daskjob.py +2 -198
  149. mlrun/runtimes/funcdoc.py +0 -9
  150. mlrun/runtimes/function.py +25 -29
  151. mlrun/runtimes/kubejob.py +5 -29
  152. mlrun/runtimes/local.py +1 -1
  153. mlrun/runtimes/mpijob/__init__.py +2 -2
  154. mlrun/runtimes/mpijob/abstract.py +10 -1
  155. mlrun/runtimes/mpijob/v1.py +0 -76
  156. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  157. mlrun/runtimes/nuclio.py +3 -2
  158. mlrun/runtimes/pod.py +28 -18
  159. mlrun/runtimes/remotesparkjob.py +1 -15
  160. mlrun/runtimes/serving.py +14 -6
  161. mlrun/runtimes/sparkjob/__init__.py +0 -1
  162. mlrun/runtimes/sparkjob/abstract.py +4 -131
  163. mlrun/runtimes/utils.py +0 -26
  164. mlrun/serving/routers.py +7 -7
  165. mlrun/serving/server.py +11 -8
  166. mlrun/serving/states.py +7 -1
  167. mlrun/serving/v2_serving.py +6 -6
  168. mlrun/utils/helpers.py +23 -42
  169. mlrun/utils/notifications/notification/__init__.py +4 -0
  170. mlrun/utils/notifications/notification/webhook.py +61 -0
  171. mlrun/utils/notifications/notification_pusher.py +5 -25
  172. mlrun/utils/regex.py +7 -2
  173. mlrun/utils/version/version.json +2 -2
  174. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
  175. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
  176. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  177. mlrun/mlutils/data.py +0 -160
  178. mlrun/mlutils/models.py +0 -78
  179. mlrun/mlutils/plots.py +0 -902
  180. mlrun/utils/model_monitoring.py +0 -249
  181. /mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
  182. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  183. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  184. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
mlrun/runtimes/daskjob.py CHANGED
@@ -15,33 +15,25 @@ import datetime
15
15
  import inspect
16
16
  import socket
17
17
  import time
18
- import typing
19
18
  from os import environ
20
- from typing import Dict, List, Optional, Union
21
-
22
- from deprecated import deprecated
23
- from kubernetes.client.rest import ApiException
24
- from sqlalchemy.orm import Session
25
19
 
26
20
  import mlrun.common.schemas
27
21
  import mlrun.errors
28
22
  import mlrun.k8s_utils
29
23
  import mlrun.utils
30
24
  import mlrun.utils.regex
31
- from mlrun.api.db.base import DBInterface
32
25
  from mlrun.errors import err_to_str
33
- from mlrun.runtimes.base import BaseRuntimeHandler
34
26
 
35
27
  from ..config import config
36
28
  from ..execution import MLClientCtx
37
29
  from ..model import RunObject
38
30
  from ..render import ipython_display
39
31
  from ..utils import logger, normalize_name, update_in
40
- from .base import FunctionStatus, RuntimeClassMode
32
+ from .base import FunctionStatus
41
33
  from .kubejob import KubejobRuntime
42
34
  from .local import exec_from_params, load_module
43
35
  from .pod import KubeResourceSpec, kube_resource_spec_to_pod_spec
44
- from .utils import RunError, get_func_selector, get_k8s, get_resource_labels, log_std
36
+ from .utils import RunError, get_func_selector, get_resource_labels, log_std
45
37
 
46
38
 
47
39
  def get_dask_resource():
@@ -406,16 +398,6 @@ class DaskCluster(KubejobRuntime):
406
398
  show_on_failure=show_on_failure,
407
399
  )
408
400
 
409
- # TODO: Remove in 1.5.0
410
- @deprecated(
411
- version="1.3.0",
412
- reason="'Dask gpus' will be removed in 1.5.0, use 'with_scheduler_limits' / 'with_worker_limits' instead",
413
- category=FutureWarning,
414
- )
415
- def gpus(self, gpus, gpu_type="nvidia.com/gpu"):
416
- update_in(self.spec.scheduler_resources, ["limits", gpu_type], gpus)
417
- update_in(self.spec.worker_resources, ["limits", gpu_type], gpus)
418
-
419
401
  def with_limits(
420
402
  self,
421
403
  mem=None,
@@ -686,181 +668,3 @@ def get_obj_status(selector=None, namespace=None):
686
668
  f"found dask function {pod.metadata.name} in non ready state ({status})"
687
669
  )
688
670
  return status
689
-
690
-
691
- class DaskRuntimeHandler(BaseRuntimeHandler):
692
- kind = "dask"
693
- class_modes = {RuntimeClassMode.run: "dask"}
694
-
695
- # Dask runtime resources are per function (and not per run).
696
- # It means that monitoring runtime resources state doesn't say anything about the run state.
697
- # Therefore dask run monitoring is done completely by the SDK, so overriding the monitoring method with no logic
698
- def monitor_runs(
699
- self, db: DBInterface, db_session: Session, leader_session: Optional[str] = None
700
- ):
701
- return
702
-
703
- @staticmethod
704
- def _get_object_label_selector(object_id: str) -> str:
705
- return f"mlrun/function={object_id}"
706
-
707
- @staticmethod
708
- def resolve_object_id(
709
- run: dict,
710
- ) -> typing.Optional[str]:
711
- """
712
- Resolves the object ID from the run object.
713
- In dask runtime, the object ID is the function name.
714
- :param run: run object
715
- :return: function name
716
- """
717
-
718
- function = run.get("spec", {}).get("function", None)
719
- if function:
720
-
721
- # a dask run's function field is in the format <project-name>/<function-name>@<run-uid>
722
- # we only want the function name
723
- project_and_function = function.split("@")[0]
724
- return project_and_function.split("/")[-1]
725
-
726
- return None
727
-
728
- def _enrich_list_resources_response(
729
- self,
730
- response: Union[
731
- mlrun.common.schemas.RuntimeResources,
732
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
733
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
734
- ],
735
- namespace: str,
736
- label_selector: str = None,
737
- group_by: Optional[
738
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
739
- ] = None,
740
- ) -> Union[
741
- mlrun.common.schemas.RuntimeResources,
742
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
743
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
744
- ]:
745
- """
746
- Handling listing service resources
747
- """
748
- enrich_needed = self._validate_if_enrich_is_needed_by_group_by(group_by)
749
- if not enrich_needed:
750
- return response
751
- services = get_k8s().v1api.list_namespaced_service(
752
- namespace, label_selector=label_selector
753
- )
754
- service_resources = []
755
- for service in services.items:
756
- service_resources.append(
757
- mlrun.common.schemas.RuntimeResource(
758
- name=service.metadata.name, labels=service.metadata.labels
759
- )
760
- )
761
- return self._enrich_service_resources_in_response(
762
- response, service_resources, group_by
763
- )
764
-
765
- def _build_output_from_runtime_resources(
766
- self,
767
- response: Union[
768
- mlrun.common.schemas.RuntimeResources,
769
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
770
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
771
- ],
772
- runtime_resources_list: List[mlrun.common.schemas.RuntimeResources],
773
- group_by: Optional[
774
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
775
- ] = None,
776
- ):
777
- enrich_needed = self._validate_if_enrich_is_needed_by_group_by(group_by)
778
- if not enrich_needed:
779
- return response
780
- service_resources = []
781
- for runtime_resources in runtime_resources_list:
782
- if runtime_resources.service_resources:
783
- service_resources += runtime_resources.service_resources
784
- return self._enrich_service_resources_in_response(
785
- response, service_resources, group_by
786
- )
787
-
788
- def _validate_if_enrich_is_needed_by_group_by(
789
- self,
790
- group_by: Optional[
791
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
792
- ] = None,
793
- ) -> bool:
794
- # Dask runtime resources are per function (and not per job) therefore, when grouping by job we're simply
795
- # omitting the dask runtime resources
796
- if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.job:
797
- return False
798
- elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
799
- return True
800
- elif group_by is not None:
801
- raise NotImplementedError(
802
- f"Provided group by field is not supported. group_by={group_by}"
803
- )
804
- return True
805
-
806
- def _enrich_service_resources_in_response(
807
- self,
808
- response: Union[
809
- mlrun.common.schemas.RuntimeResources,
810
- mlrun.common.schemas.GroupedByJobRuntimeResourcesOutput,
811
- mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput,
812
- ],
813
- service_resources: List[mlrun.common.schemas.RuntimeResource],
814
- group_by: Optional[
815
- mlrun.common.schemas.ListRuntimeResourcesGroupByField
816
- ] = None,
817
- ):
818
- if group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
819
- for service_resource in service_resources:
820
- self._add_resource_to_grouped_by_project_resources_response(
821
- response, "service_resources", service_resource
822
- )
823
- else:
824
- response.service_resources = service_resources
825
- return response
826
-
827
- def _delete_extra_resources(
828
- self,
829
- db: DBInterface,
830
- db_session: Session,
831
- namespace: str,
832
- deleted_resources: List[Dict],
833
- label_selector: str = None,
834
- force: bool = False,
835
- grace_period: int = None,
836
- ):
837
- """
838
- Handling services deletion
839
- """
840
- if grace_period is None:
841
- grace_period = config.runtime_resources_deletion_grace_period
842
- service_names = []
843
- for pod_dict in deleted_resources:
844
- dask_component = (
845
- pod_dict["metadata"].get("labels", {}).get("dask.org/component")
846
- )
847
- cluster_name = (
848
- pod_dict["metadata"].get("labels", {}).get("dask.org/cluster-name")
849
- )
850
- if dask_component == "scheduler" and cluster_name:
851
- service_names.append(cluster_name)
852
-
853
- services = get_k8s().v1api.list_namespaced_service(
854
- namespace, label_selector=label_selector
855
- )
856
- for service in services.items:
857
- try:
858
- if force or service.metadata.name in service_names:
859
- get_k8s().v1api.delete_namespaced_service(
860
- service.metadata.name, namespace
861
- )
862
- logger.info(f"Deleted service: {service.metadata.name}")
863
- except ApiException as exc:
864
- # ignore error if service is already removed
865
- if exc.status != 404:
866
- raise
mlrun/runtimes/funcdoc.py CHANGED
@@ -15,7 +15,6 @@
15
15
  import ast
16
16
  import inspect
17
17
  import re
18
- import sys
19
18
 
20
19
  from mlrun.model import FunctionEntrypoint
21
20
 
@@ -251,14 +250,6 @@ def iter_elems(ann):
251
250
  if not hasattr(ann, "slice"):
252
251
  return [ann.value]
253
252
 
254
- # From python 3.9, slice is an expr and we should evaluate it recursively. Left this for backward compatibility.
255
- # TODO: Remove this in 1.5.0 when we drop support for python 3.7
256
- if sys.version_info < (3, 9):
257
- if hasattr(ann.slice, "elts"):
258
- return ann.slice.elts
259
- elif hasattr(ann.slice, "value"):
260
- return [ann.slice.value]
261
-
262
253
  return [ann]
263
254
 
264
255
 
@@ -28,11 +28,12 @@ from kubernetes import client
28
28
  from nuclio.deploy import find_dashboard_url, get_deploy_status
29
29
  from nuclio.triggers import V3IOStreamTrigger
30
30
 
31
+ import mlrun.db
31
32
  import mlrun.errors
32
33
  import mlrun.k8s_utils
33
34
  import mlrun.utils
35
+ import mlrun.utils.helpers
34
36
  from mlrun.common.schemas import AuthInfo
35
- from mlrun.db import RunDBError
36
37
 
37
38
  from ..config import config as mlconf
38
39
  from ..errors import err_to_str
@@ -86,26 +87,6 @@ def validate_nuclio_version_compatibility(*min_versions):
86
87
  return False
87
88
 
88
89
 
89
- def is_nuclio_version_in_range(min_version: str, max_version: str) -> bool:
90
- """
91
- Return whether the Nuclio version is in the range, inclusive for min, exclusive for max - [min, max)
92
- """
93
- try:
94
- parsed_min_version = semver.VersionInfo.parse(min_version)
95
- parsed_max_version = semver.VersionInfo.parse(max_version)
96
- nuclio_version = mlrun.runtimes.utils.resolve_nuclio_version()
97
- parsed_current_version = semver.VersionInfo.parse(nuclio_version)
98
- except ValueError:
99
- logger.warning(
100
- "Unable to parse nuclio version, assuming in range",
101
- nuclio_version=nuclio_version,
102
- min_version=min_version,
103
- max_version=max_version,
104
- )
105
- return True
106
- return parsed_min_version <= parsed_current_version < parsed_max_version
107
-
108
-
109
90
  def min_nuclio_versions(*versions):
110
91
  def decorator(function):
111
92
  def wrapper(*args, **kwargs):
@@ -508,6 +489,16 @@ class RemoteRuntime(KubeResource):
508
489
  endpoint = None
509
490
  if "://" in stream_path:
510
491
  endpoint, stream_path = parse_path(stream_path, suffix="")
492
+
493
+ # verify v3io stream trigger name is valid
494
+ mlrun.utils.helpers.validate_v3io_stream_consumer_group(group)
495
+
496
+ consumer_group = kwargs.pop("consumerGroup", None)
497
+ if consumer_group:
498
+ logger.warning(
499
+ "consumerGroup kwargs value is ignored. use group argument instead"
500
+ )
501
+
511
502
  container, path = split_path(stream_path)
512
503
  shards = shards or 1
513
504
  extra_attributes = extra_attributes or {}
@@ -623,7 +614,7 @@ class RemoteRuntime(KubeResource):
623
614
  text, last_log_timestamp = db.get_builder_status(
624
615
  self, last_log_timestamp=last_log_timestamp, verbose=verbose
625
616
  )
626
- except RunDBError:
617
+ except mlrun.db.RunDBError:
627
618
  raise ValueError("function or deploy process not found")
628
619
  state = self.status.state
629
620
  if text:
@@ -734,7 +725,7 @@ class RemoteRuntime(KubeResource):
734
725
  text, last_log_timestamp = self._get_db().get_builder_status(
735
726
  self, last_log_timestamp=last_log_timestamp, verbose=verbose
736
727
  )
737
- except RunDBError:
728
+ except mlrun.db.RunDBError:
738
729
  if raise_on_exception:
739
730
  return "", "", None
740
731
  raise ValueError("function or deploy process not found")
@@ -745,8 +736,8 @@ class RemoteRuntime(KubeResource):
745
736
  runtime_env = {
746
737
  "MLRUN_DEFAULT_PROJECT": self.metadata.project or mlconf.default_project,
747
738
  }
748
- if self.spec.rundb or mlconf.httpdb.api_url:
749
- runtime_env["MLRUN_DBPATH"] = self.spec.rundb or mlconf.httpdb.api_url
739
+ if mlconf.httpdb.api_url:
740
+ runtime_env["MLRUN_DBPATH"] = mlconf.httpdb.api_url
750
741
  if mlconf.namespace:
751
742
  runtime_env["MLRUN_NAMESPACE"] = mlconf.namespace
752
743
  if self.metadata.credentials.access_key:
@@ -844,6 +835,7 @@ class RemoteRuntime(KubeResource):
844
835
  force_external_address: bool = False,
845
836
  auth_info: AuthInfo = None,
846
837
  mock: bool = None,
838
+ **http_client_kwargs,
847
839
  ):
848
840
  """Invoke the remote (live) function and return the results
849
841
 
@@ -859,6 +851,9 @@ class RemoteRuntime(KubeResource):
859
851
  :param force_external_address: use the external ingress URL
860
852
  :param auth_info: service AuthInfo
861
853
  :param mock: use mock server vs a real Nuclio function (for local simulations)
854
+ :param http_client_kwargs: allow the user to pass any parameter supported in requests.request method
855
+ see this link for more information:
856
+ https://requests.readthedocs.io/en/latest/api/#requests.request
862
857
  """
863
858
  if not method:
864
859
  method = "POST" if body else "GET"
@@ -890,15 +885,16 @@ class RemoteRuntime(KubeResource):
890
885
  self.metadata.name, self.metadata.project, self.metadata.tag
891
886
  )
892
887
  headers.setdefault("x-nuclio-target", full_function_name)
893
- kwargs = {}
888
+ if not http_client_kwargs:
889
+ http_client_kwargs = {}
894
890
  if body:
895
891
  if isinstance(body, (str, bytes)):
896
- kwargs["data"] = body
892
+ http_client_kwargs["data"] = body
897
893
  else:
898
- kwargs["json"] = body
894
+ http_client_kwargs["json"] = body
899
895
  try:
900
896
  logger.info("invoking function", method=method, path=path)
901
- resp = requests.request(method, path, headers=headers, **kwargs)
897
+ resp = requests.request(method, path, headers=headers, **http_client_kwargs)
902
898
  except OSError as err:
903
899
  raise OSError(
904
900
  f"error: cannot run function at url {path}, {err_to_str(err)}"
mlrun/runtimes/kubejob.py CHANGED
@@ -20,15 +20,14 @@ from kubernetes import client
20
20
  from kubernetes.client.rest import ApiException
21
21
 
22
22
  import mlrun.common.schemas
23
+ import mlrun.db
23
24
  import mlrun.errors
24
- from mlrun.runtimes.base import BaseRuntimeHandler
25
25
 
26
- from ..db import RunDBError
27
26
  from ..errors import err_to_str
28
27
  from ..kfpops import build_op
29
28
  from ..model import RunObject
30
29
  from ..utils import get_in, logger
31
- from .base import RunError, RuntimeClassMode
30
+ from .base import RunError
32
31
  from .pod import KubeResource, kube_resource_spec_to_pod_spec
33
32
  from .utils import get_k8s
34
33
 
@@ -257,7 +256,7 @@ class KubejobRuntime(KubeResource):
257
256
  offset = 0
258
257
  try:
259
258
  text, _ = db.get_builder_status(self, 0, logs=logs)
260
- except RunDBError:
259
+ except mlrun.db.RunDBError:
261
260
  raise ValueError("function or build process not found")
262
261
 
263
262
  def print_log(text):
@@ -360,10 +359,8 @@ class KubejobRuntime(KubeResource):
360
359
 
361
360
  if self.spec.clone_target_dir:
362
361
  workdir = workdir or ""
363
- if workdir.startswith("./"):
364
- # TODO: use 'removeprefix' when we drop python 3.7 support
365
- # workdir.removeprefix("./")
366
- workdir = workdir[2:]
362
+ workdir = workdir.removeprefix("./")
363
+
367
364
  return os.path.join(self.spec.clone_target_dir, workdir)
368
365
 
369
366
  return workdir
@@ -390,24 +387,3 @@ def func_to_pod(image, runtime, extra_env, command, args, workdir):
390
387
  ]
391
388
 
392
389
  return pod_spec
393
-
394
-
395
- class KubeRuntimeHandler(BaseRuntimeHandler):
396
- kind = "job"
397
- class_modes = {RuntimeClassMode.run: "job", RuntimeClassMode.build: "build"}
398
-
399
- @staticmethod
400
- def _expect_pods_without_uid() -> bool:
401
- """
402
- builder pods are handled as part of this runtime handler - they are not coupled to run object, therefore they
403
- don't have the uid in their labels
404
- """
405
- return True
406
-
407
- @staticmethod
408
- def _are_resources_coupled_to_run_object() -> bool:
409
- return True
410
-
411
- @staticmethod
412
- def _get_object_label_selector(object_id: str) -> str:
413
- return f"mlrun/uid={object_id}"
mlrun/runtimes/local.py CHANGED
@@ -420,7 +420,7 @@ def exec_from_params(handler, runobj: RunObject, context: MLClientCtx, cwd=None)
420
420
  if runobj.spec.verbose:
421
421
  logger.set_logger_level("DEBUG")
422
422
 
423
- # Prepare the inputs type hints (user may pass type hints as part of the inputs keys):
423
+ # Prepare the inputs type hints (user may pass type hints as part of the input keys):
424
424
  runobj.spec.extract_type_hints_from_inputs()
425
425
  # Read the keyword arguments to pass to the function (combining params and inputs from the run spec):
426
426
  kwargs = get_func_arg(handler, runobj, context)
@@ -14,5 +14,5 @@
14
14
 
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
- from .v1 import MpiRuntimeV1, MpiV1RuntimeHandler
18
- from .v1alpha1 import MpiRuntimeV1Alpha1, MpiV1Alpha1RuntimeHandler
17
+ from .v1 import MpiRuntimeV1
18
+ from .v1alpha1 import MpiRuntimeV1Alpha1
@@ -25,7 +25,7 @@ from mlrun.model import RunObject
25
25
  from mlrun.runtimes.kubejob import KubejobRuntime
26
26
  from mlrun.runtimes.pod import KubeResourceSpec
27
27
  from mlrun.runtimes.utils import RunError, get_k8s
28
- from mlrun.utils import get_in, logger
28
+ from mlrun.utils import get_in, logger, update_in
29
29
 
30
30
 
31
31
  class MPIResourceSpec(KubeResourceSpec):
@@ -138,6 +138,15 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
138
138
  def _get_crd_info() -> typing.Tuple[str, str, str]:
139
139
  pass
140
140
 
141
+ @staticmethod
142
+ def _get_run_completion_updates(run: dict) -> dict:
143
+
144
+ # TODO: add a 'workers' section in run objects state, each worker will update its state while
145
+ # the run state will be resolved by the server.
146
+ # update the run object state if empty so that it won't default to 'created' state
147
+ update_in(run, "status.state", "running", append=False, replace=False)
148
+ return {}
149
+
141
150
  def _pretty_print_jobs(self, items: typing.List):
142
151
  print(f"{'status':10} {'name':20} {'start':21} end")
143
152
  for i in items:
@@ -13,17 +13,13 @@
13
13
  # limitations under the License.
14
14
  import typing
15
15
  from copy import deepcopy
16
- from datetime import datetime
17
16
 
18
17
  from kubernetes import client
19
- from sqlalchemy.orm import Session
20
18
 
21
19
  import mlrun.runtimes.pod
22
- from mlrun.api.db.base import DBInterface
23
20
  from mlrun.config import config as mlconf
24
21
  from mlrun.execution import MLClientCtx
25
22
  from mlrun.model import RunObject
26
- from mlrun.runtimes.base import BaseRuntimeHandler, RunStates, RuntimeClassMode
27
23
  from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
28
24
  from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime, MPIResourceSpec
29
25
  from mlrun.utils import get_in, update_in
@@ -318,75 +314,3 @@ class MpiRuntimeV1(AbstractMPIJobRuntime):
318
314
  MpiRuntimeV1.crd_version,
319
315
  MpiRuntimeV1.crd_plural,
320
316
  )
321
-
322
-
323
- class MpiV1RuntimeHandler(BaseRuntimeHandler):
324
- kind = "mpijob"
325
- class_modes = {
326
- RuntimeClassMode.run: "mpijob",
327
- }
328
-
329
- def _resolve_crd_object_status_info(
330
- self, db: DBInterface, db_session: Session, crd_object
331
- ) -> typing.Tuple[bool, typing.Optional[datetime], typing.Optional[str]]:
332
- """
333
- https://github.com/kubeflow/mpi-operator/blob/master/pkg/apis/kubeflow/v1/types.go#L29
334
- https://github.com/kubeflow/common/blob/master/pkg/apis/common/v1/types.go#L55
335
- """
336
- launcher_status = (
337
- crd_object.get("status", {}).get("replicaStatuses", {}).get("Launcher", {})
338
- )
339
- # the launcher status also has running property, but it's empty for
340
- # short period after the creation, so we're
341
- # checking terminal state by the completion time existence
342
- in_terminal_state = (
343
- crd_object.get("status", {}).get("completionTime", None) is not None
344
- )
345
- desired_run_state = RunStates.running
346
- completion_time = None
347
- if in_terminal_state:
348
- completion_time = datetime.fromisoformat(
349
- crd_object.get("status", {})
350
- .get("completionTime")
351
- .replace("Z", "+00:00")
352
- )
353
- desired_run_state = (
354
- RunStates.error
355
- if launcher_status.get("failed", 0) > 0
356
- else RunStates.completed
357
- )
358
- return in_terminal_state, completion_time, desired_run_state
359
-
360
- @staticmethod
361
- def _are_resources_coupled_to_run_object() -> bool:
362
- return True
363
-
364
- @staticmethod
365
- def _get_object_label_selector(object_id: str) -> str:
366
- return f"mlrun/uid={object_id}"
367
-
368
- @staticmethod
369
- def _get_main_runtime_resource_label_selector() -> str:
370
- """
371
- There are some runtimes which might have multiple k8s resources attached to a one runtime, in this case
372
- we don't want to pull logs from all but rather only for the "driver"/"launcher" etc
373
- :return: the label selector
374
- """
375
- return "mpi-job-role=launcher"
376
-
377
- @staticmethod
378
- def _get_run_completion_updates(run: dict) -> dict:
379
-
380
- # TODO: add a 'workers' section in run objects state, each worker will update its state while
381
- # the run state will be resolved by the server.
382
- # update the run object state if empty so that it won't default to 'created' state
383
- update_in(run, "status.state", "running", append=False, replace=False)
384
- return {}
385
-
386
- @staticmethod
387
- def _get_crd_info() -> typing.Tuple[str, str, str]:
388
- return (
389
- MpiRuntimeV1.crd_group,
390
- MpiRuntimeV1.crd_version,
391
- MpiRuntimeV1.crd_plural,
392
- )
@@ -13,18 +13,14 @@
13
13
  # limitations under the License.
14
14
  import typing
15
15
  from copy import deepcopy
16
- from datetime import datetime
17
16
 
18
17
  from kubernetes import client
19
- from sqlalchemy.orm import Session
20
18
 
21
19
  import mlrun.runtimes.pod
22
- from mlrun.api.db.base import DBInterface
23
20
  from mlrun.config import config as mlconf
24
21
  from mlrun.execution import MLClientCtx
25
22
  from mlrun.model import RunObject
26
- from mlrun.runtimes.base import BaseRuntimeHandler, RunStates, RuntimeClassMode
27
- from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1Alpha1States
23
+ from mlrun.runtimes.constants import MPIJobCRDVersions
28
24
  from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime
29
25
  from mlrun.utils import get_in, update_in
30
26
 
@@ -156,72 +152,3 @@ class MpiRuntimeV1Alpha1(AbstractMPIJobRuntime):
156
152
  MpiRuntimeV1Alpha1.crd_version,
157
153
  MpiRuntimeV1Alpha1.crd_plural,
158
154
  )
159
-
160
-
161
- class MpiV1Alpha1RuntimeHandler(BaseRuntimeHandler):
162
- kind = "mpijob"
163
- class_modes = {
164
- RuntimeClassMode.run: "mpijob",
165
- }
166
-
167
- def _resolve_crd_object_status_info(
168
- self, db: DBInterface, db_session: Session, crd_object
169
- ) -> typing.Tuple[bool, typing.Optional[datetime], typing.Optional[str]]:
170
- """
171
- https://github.com/kubeflow/mpi-operator/blob/master/pkg/apis/kubeflow/v1alpha1/types.go#L115
172
- """
173
- launcher_status = crd_object.get("status", {}).get("launcherStatus", "")
174
- in_terminal_state = launcher_status in MPIJobV1Alpha1States.terminal_states()
175
- desired_run_state = MPIJobV1Alpha1States.mpijob_state_to_run_state(
176
- launcher_status
177
- )
178
- completion_time = None
179
- if in_terminal_state:
180
- completion_time = datetime.fromisoformat(
181
- crd_object.get("status", {})
182
- .get("completionTime")
183
- .replace("Z", "+00:00")
184
- )
185
- desired_run_state = {
186
- "Succeeded": RunStates.completed,
187
- "Failed": RunStates.error,
188
- }[launcher_status]
189
- return in_terminal_state, completion_time, desired_run_state
190
-
191
- @staticmethod
192
- def _are_resources_coupled_to_run_object() -> bool:
193
- return True
194
-
195
- @staticmethod
196
- def _get_object_label_selector(object_id: str) -> str:
197
- return f"mlrun/uid={object_id}"
198
-
199
- @staticmethod
200
- def _get_main_runtime_resource_label_selector() -> str:
201
- """
202
- There are some runtimes which might have multiple k8s resources attached to a one runtime, in this case
203
- we don't want to pull logs from all but rather only for the "driver"/"launcher" etc
204
- :return: the label selector
205
- """
206
- return "mpi_role_type=launcher"
207
-
208
- @staticmethod
209
- def _get_run_completion_updates(run: dict) -> dict:
210
-
211
- # TODO: add a 'workers' section in run objects state, each worker will update its state while
212
- # the run state will be resolved by the server.
213
- # update the run object state if empty so that it won't default to 'created' state
214
- update_in(run, "status.state", "running", append=False, replace=False)
215
- return {}
216
-
217
- @staticmethod
218
- def _get_crd_info() -> typing.Tuple[str, str, str]:
219
- return (
220
- MpiRuntimeV1Alpha1.crd_group,
221
- MpiRuntimeV1Alpha1.crd_version,
222
- MpiRuntimeV1Alpha1.crd_plural,
223
- )
224
-
225
- @staticmethod
226
- def _get_crd_object_status(crd_object) -> str:
227
- return crd_object.get("status", {}).get("launcherStatus", "")