mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (184) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +3 -41
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/frontend_spec.py +2 -1
  7. mlrun/api/api/endpoints/functions.py +95 -59
  8. mlrun/api/api/endpoints/grafana_proxy.py +9 -9
  9. mlrun/api/api/endpoints/logs.py +17 -3
  10. mlrun/api/api/endpoints/model_endpoints.py +3 -2
  11. mlrun/api/api/endpoints/pipelines.py +1 -5
  12. mlrun/api/api/endpoints/projects.py +88 -0
  13. mlrun/api/api/endpoints/runs.py +48 -6
  14. mlrun/api/api/endpoints/submit.py +2 -1
  15. mlrun/api/api/endpoints/workflows.py +355 -0
  16. mlrun/api/api/utils.py +3 -4
  17. mlrun/api/crud/__init__.py +1 -0
  18. mlrun/api/crud/client_spec.py +6 -2
  19. mlrun/api/crud/feature_store.py +5 -0
  20. mlrun/api/crud/model_monitoring/__init__.py +1 -0
  21. mlrun/api/crud/model_monitoring/deployment.py +497 -0
  22. mlrun/api/crud/model_monitoring/grafana.py +96 -42
  23. mlrun/api/crud/model_monitoring/helpers.py +159 -0
  24. mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
  25. mlrun/api/crud/notifications.py +9 -4
  26. mlrun/api/crud/pipelines.py +6 -11
  27. mlrun/api/crud/projects.py +2 -2
  28. mlrun/api/crud/runtime_resources.py +4 -3
  29. mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
  30. mlrun/api/crud/secrets.py +21 -0
  31. mlrun/api/crud/workflows.py +352 -0
  32. mlrun/api/db/base.py +16 -1
  33. mlrun/api/db/init_db.py +2 -4
  34. mlrun/api/db/session.py +1 -1
  35. mlrun/api/db/sqldb/db.py +129 -31
  36. mlrun/api/db/sqldb/models/models_mysql.py +15 -1
  37. mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
  38. mlrun/api/launcher.py +38 -6
  39. mlrun/api/main.py +3 -2
  40. mlrun/api/rundb/__init__.py +13 -0
  41. mlrun/{db → api/rundb}/sqldb.py +36 -84
  42. mlrun/api/runtime_handlers/__init__.py +56 -0
  43. mlrun/api/runtime_handlers/base.py +1247 -0
  44. mlrun/api/runtime_handlers/daskjob.py +209 -0
  45. mlrun/api/runtime_handlers/kubejob.py +37 -0
  46. mlrun/api/runtime_handlers/mpijob.py +147 -0
  47. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  48. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  49. mlrun/api/schemas/__init__.py +17 -6
  50. mlrun/api/utils/builder.py +1 -4
  51. mlrun/api/utils/clients/chief.py +14 -0
  52. mlrun/api/utils/clients/iguazio.py +33 -33
  53. mlrun/api/utils/clients/nuclio.py +2 -2
  54. mlrun/api/utils/periodic.py +9 -2
  55. mlrun/api/utils/projects/follower.py +14 -7
  56. mlrun/api/utils/projects/leader.py +2 -1
  57. mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
  58. mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
  59. mlrun/api/utils/runtimes/__init__.py +14 -0
  60. mlrun/api/utils/runtimes/nuclio.py +43 -0
  61. mlrun/api/utils/scheduler.py +98 -15
  62. mlrun/api/utils/singletons/db.py +5 -1
  63. mlrun/api/utils/singletons/project_member.py +4 -1
  64. mlrun/api/utils/singletons/scheduler.py +1 -1
  65. mlrun/artifacts/base.py +6 -6
  66. mlrun/artifacts/dataset.py +4 -4
  67. mlrun/artifacts/manager.py +2 -3
  68. mlrun/artifacts/model.py +2 -2
  69. mlrun/artifacts/plots.py +8 -8
  70. mlrun/common/db/__init__.py +14 -0
  71. mlrun/common/helpers.py +37 -0
  72. mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
  73. mlrun/common/model_monitoring/helpers.py +69 -0
  74. mlrun/common/schemas/__init__.py +13 -1
  75. mlrun/common/schemas/auth.py +4 -1
  76. mlrun/common/schemas/client_spec.py +1 -1
  77. mlrun/common/schemas/function.py +17 -0
  78. mlrun/common/schemas/model_monitoring/__init__.py +48 -0
  79. mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
  80. mlrun/common/schemas/model_monitoring/grafana.py +55 -0
  81. mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
  82. mlrun/common/schemas/notification.py +1 -0
  83. mlrun/common/schemas/object.py +4 -0
  84. mlrun/common/schemas/project.py +1 -0
  85. mlrun/common/schemas/regex.py +1 -1
  86. mlrun/common/schemas/runs.py +1 -8
  87. mlrun/common/schemas/schedule.py +1 -8
  88. mlrun/common/schemas/workflow.py +54 -0
  89. mlrun/config.py +45 -42
  90. mlrun/datastore/__init__.py +21 -0
  91. mlrun/datastore/base.py +1 -1
  92. mlrun/datastore/datastore.py +9 -0
  93. mlrun/datastore/dbfs_store.py +168 -0
  94. mlrun/datastore/helpers.py +18 -0
  95. mlrun/datastore/sources.py +1 -0
  96. mlrun/datastore/store_resources.py +2 -5
  97. mlrun/datastore/v3io.py +1 -2
  98. mlrun/db/__init__.py +4 -68
  99. mlrun/db/base.py +12 -0
  100. mlrun/db/factory.py +65 -0
  101. mlrun/db/httpdb.py +175 -20
  102. mlrun/db/nopdb.py +4 -2
  103. mlrun/execution.py +4 -2
  104. mlrun/feature_store/__init__.py +1 -0
  105. mlrun/feature_store/api.py +1 -2
  106. mlrun/feature_store/common.py +2 -1
  107. mlrun/feature_store/feature_set.py +1 -11
  108. mlrun/feature_store/feature_vector.py +340 -2
  109. mlrun/feature_store/ingestion.py +5 -10
  110. mlrun/feature_store/retrieval/base.py +118 -104
  111. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  112. mlrun/feature_store/retrieval/job.py +4 -1
  113. mlrun/feature_store/retrieval/local_merger.py +18 -18
  114. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  115. mlrun/feature_store/retrieval/storey_merger.py +22 -16
  116. mlrun/kfpops.py +3 -9
  117. mlrun/launcher/base.py +57 -53
  118. mlrun/launcher/client.py +5 -4
  119. mlrun/launcher/factory.py +24 -13
  120. mlrun/launcher/local.py +6 -6
  121. mlrun/launcher/remote.py +4 -4
  122. mlrun/lists.py +0 -11
  123. mlrun/model.py +11 -17
  124. mlrun/model_monitoring/__init__.py +2 -22
  125. mlrun/model_monitoring/features_drift_table.py +1 -1
  126. mlrun/model_monitoring/helpers.py +22 -210
  127. mlrun/model_monitoring/model_endpoint.py +1 -1
  128. mlrun/model_monitoring/model_monitoring_batch.py +127 -50
  129. mlrun/model_monitoring/prometheus.py +219 -0
  130. mlrun/model_monitoring/stores/__init__.py +16 -11
  131. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
  132. mlrun/model_monitoring/stores/models/mysql.py +47 -29
  133. mlrun/model_monitoring/stores/models/sqlite.py +47 -29
  134. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
  135. mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
  136. mlrun/model_monitoring/tracking_policy.py +104 -0
  137. mlrun/package/packager.py +6 -8
  138. mlrun/package/packagers/default_packager.py +121 -10
  139. mlrun/package/packagers/numpy_packagers.py +1 -1
  140. mlrun/platforms/__init__.py +0 -2
  141. mlrun/platforms/iguazio.py +0 -56
  142. mlrun/projects/pipelines.py +53 -159
  143. mlrun/projects/project.py +10 -37
  144. mlrun/render.py +1 -1
  145. mlrun/run.py +8 -124
  146. mlrun/runtimes/__init__.py +6 -42
  147. mlrun/runtimes/base.py +29 -1249
  148. mlrun/runtimes/daskjob.py +2 -198
  149. mlrun/runtimes/funcdoc.py +0 -9
  150. mlrun/runtimes/function.py +25 -29
  151. mlrun/runtimes/kubejob.py +5 -29
  152. mlrun/runtimes/local.py +1 -1
  153. mlrun/runtimes/mpijob/__init__.py +2 -2
  154. mlrun/runtimes/mpijob/abstract.py +10 -1
  155. mlrun/runtimes/mpijob/v1.py +0 -76
  156. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  157. mlrun/runtimes/nuclio.py +3 -2
  158. mlrun/runtimes/pod.py +28 -18
  159. mlrun/runtimes/remotesparkjob.py +1 -15
  160. mlrun/runtimes/serving.py +14 -6
  161. mlrun/runtimes/sparkjob/__init__.py +0 -1
  162. mlrun/runtimes/sparkjob/abstract.py +4 -131
  163. mlrun/runtimes/utils.py +0 -26
  164. mlrun/serving/routers.py +7 -7
  165. mlrun/serving/server.py +11 -8
  166. mlrun/serving/states.py +7 -1
  167. mlrun/serving/v2_serving.py +6 -6
  168. mlrun/utils/helpers.py +23 -42
  169. mlrun/utils/notifications/notification/__init__.py +4 -0
  170. mlrun/utils/notifications/notification/webhook.py +61 -0
  171. mlrun/utils/notifications/notification_pusher.py +5 -25
  172. mlrun/utils/regex.py +7 -2
  173. mlrun/utils/version/version.json +2 -2
  174. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
  175. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
  176. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  177. mlrun/mlutils/data.py +0 -160
  178. mlrun/mlutils/models.py +0 -78
  179. mlrun/mlutils/plots.py +0 -902
  180. mlrun/utils/model_monitoring.py +0 -249
  181. /mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
  182. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  183. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  184. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
mlrun/runtimes/nuclio.py CHANGED
@@ -16,7 +16,8 @@ import json
16
16
  import os
17
17
  import socket
18
18
 
19
- from ..db import get_or_set_dburl
19
+ import mlrun.db
20
+
20
21
  from ..errors import err_to_str
21
22
  from ..execution import MLClientCtx
22
23
  from ..model import RunTemplate
@@ -60,7 +61,7 @@ def nuclio_jobs_handler(context, event):
60
61
  status_code=400,
61
62
  )
62
63
 
63
- out = get_or_set_dburl()
64
+ out = mlrun.db.get_or_set_dburl()
64
65
  if out:
65
66
  context.logger.info(f"logging run results to: {out}")
66
67
 
mlrun/runtimes/pod.py CHANGED
@@ -20,7 +20,6 @@ from enum import Enum
20
20
  import dotenv
21
21
  import kfp.dsl
22
22
  import kubernetes.client as k8s_client
23
- from deprecated import deprecated
24
23
 
25
24
  import mlrun.errors
26
25
  import mlrun.utils.regex
@@ -360,15 +359,25 @@ class KubeResourceSpec(FunctionSpec):
360
359
  patch: bool = False,
361
360
  ):
362
361
  """
363
- set pod cpu/memory/gpu limits
364
- by default it overrides the whole limits section, if you wish to patch specific resources use `patch=True`.
362
+ Set pod cpu/memory/gpu limits (max values)
363
+
364
+ :param mem: set limit for memory e.g. '500M', '2G', etc.
365
+ :param cpu: set limit for cpu e.g. '0.5', '2', etc.
366
+ :param gpus: set limit for gpu
367
+ :param gpu_type: set gpu type e.g. "nvidia.com/gpu"
368
+ :param patch: by default it overrides the whole limits section,
369
+ if you wish to patch specific resources use `patch=True`
365
370
  """
366
371
  self._verify_and_set_limits("resources", mem, cpu, gpus, gpu_type, patch=patch)
367
372
 
368
373
  def with_requests(self, mem: str = None, cpu: str = None, patch: bool = False):
369
374
  """
370
- set requested (desired) pod cpu/memory resources
371
- by default it overrides the whole requests section, if you wish to patch specific resources use `patch=True`.
375
+ Set requested (desired) pod cpu/memory resources
376
+
377
+ :param mem: set request for memory e.g. '200M', '1G', etc.
378
+ :param cpu: set request for cpu e.g. '0.1', '1', etc.
379
+ :param patch: by default it overrides the whole requests section,
380
+ if you wish to patch specific resources use `patch=True`
372
381
  """
373
382
  self._verify_and_set_requests("resources", mem, cpu, patch)
374
383
 
@@ -1004,15 +1013,6 @@ class KubeResource(BaseRuntime):
1004
1013
  self.set_env(name, value)
1005
1014
  return self
1006
1015
 
1007
- # TODO: Remove in 1.5.0
1008
- @deprecated(
1009
- version="1.3.0",
1010
- reason="'Job gpus' will be removed in 1.5.0, use 'with_limits' instead",
1011
- category=FutureWarning,
1012
- )
1013
- def gpus(self, gpus, gpu_type="nvidia.com/gpu"):
1014
- update_in(self.spec.resources, ["limits", gpu_type], gpus)
1015
-
1016
1016
  def set_image_pull_configuration(
1017
1017
  self, image_pull_policy: str = None, image_pull_secret_name: str = None
1018
1018
  ):
@@ -1041,15 +1041,25 @@ class KubeResource(BaseRuntime):
1041
1041
  patch: bool = False,
1042
1042
  ):
1043
1043
  """
1044
- set pod cpu/memory/gpu limits
1045
- by default it overrides the whole limits section, if you wish to patch specific resources use `patch=True`.
1044
+ Set pod cpu/memory/gpu limits (max values)
1045
+
1046
+ :param mem: set limit for memory e.g. '500M', '2G', etc.
1047
+ :param cpu: set limit for cpu e.g. '0.5', '2', etc.
1048
+ :param gpus: set limit for gpu
1049
+ :param gpu_type: set gpu type e.g. "nvidia.com/gpu"
1050
+ :param patch: by default it overrides the whole limits section,
1051
+ if you wish to patch specific resources use `patch=True`
1046
1052
  """
1047
1053
  self.spec.with_limits(mem, cpu, gpus, gpu_type, patch=patch)
1048
1054
 
1049
1055
  def with_requests(self, mem: str = None, cpu: str = None, patch: bool = False):
1050
1056
  """
1051
- set requested (desired) pod cpu/memory resources
1052
- by default it overrides the whole requests section, if you wish to patch specific resources use `patch=True`.
1057
+ Set requested (desired) pod cpu/memory resources
1058
+
1059
+ :param mem: set request for memory e.g. '200M', '1G', etc.
1060
+ :param cpu: set request for cpu e.g. '0.1', '1', etc.
1061
+ :param patch: by default it overrides the whole requests section,
1062
+ if you wish to patch specific resources use `patch=True`
1053
1063
  """
1054
1064
  self.spec.with_requests(mem, cpu, patch=patch)
1055
1065
 
@@ -21,8 +21,7 @@ from mlrun.config import config
21
21
 
22
22
  from ..model import RunObject
23
23
  from ..platforms.iguazio import mount_v3io, mount_v3iod
24
- from .base import RuntimeClassMode
25
- from .kubejob import KubejobRuntime, KubeRuntimeHandler
24
+ from .kubejob import KubejobRuntime
26
25
  from .pod import KubeResourceSpec
27
26
 
28
27
 
@@ -210,18 +209,5 @@ class RemoteSparkRuntime(KubejobRuntime):
210
209
  )
211
210
 
212
211
 
213
- class RemoteSparkRuntimeHandler(KubeRuntimeHandler):
214
- kind = "remote-spark"
215
- class_modes = {RuntimeClassMode.run: "remote-spark"}
216
-
217
- @staticmethod
218
- def _are_resources_coupled_to_run_object() -> bool:
219
- return True
220
-
221
- @staticmethod
222
- def _get_object_label_selector(object_id: str) -> str:
223
- return f"mlrun/uid={object_id}"
224
-
225
-
226
212
  def igz_spark_pre_hook():
227
213
  run(["/bin/bash", "/etc/config/v3io/spark-job-init.sh"])
mlrun/runtimes/serving.py CHANGED
@@ -22,6 +22,7 @@ from nuclio import KafkaTrigger
22
22
 
23
23
  import mlrun
24
24
  import mlrun.common.schemas
25
+ from mlrun.model_monitoring.tracking_policy import TrackingPolicy
25
26
 
26
27
  from ..datastore import parse_kafka_url
27
28
  from ..model import ObjectList
@@ -36,7 +37,7 @@ from ..serving.states import (
36
37
  new_remote_endpoint,
37
38
  params_to_step,
38
39
  )
39
- from ..utils import get_caller_globals, logger, model_monitoring, set_paths
40
+ from ..utils import get_caller_globals, logger, set_paths
40
41
  from .function import NuclioSpec, RemoteRuntime
41
42
  from .function_reference import FunctionReference
42
43
 
@@ -146,7 +147,6 @@ class ServingSpec(NuclioSpec):
146
147
  add_templated_ingress_host_mode=None,
147
148
  clone_target_dir=None,
148
149
  ):
149
-
150
150
  super().__init__(
151
151
  command=command,
152
152
  args=args,
@@ -304,7 +304,7 @@ class ServingRuntime(RemoteRuntime):
304
304
  batch: int = None,
305
305
  sample: int = None,
306
306
  stream_args: dict = None,
307
- tracking_policy: Union[model_monitoring.TrackingPolicy, dict] = None,
307
+ tracking_policy: Union[TrackingPolicy, dict] = None,
308
308
  ):
309
309
  """set tracking parameters:
310
310
 
@@ -334,9 +334,7 @@ class ServingRuntime(RemoteRuntime):
334
334
  if tracking_policy:
335
335
  if isinstance(tracking_policy, dict):
336
336
  # Convert tracking policy dictionary into `model_monitoring.TrackingPolicy` object
337
- self.spec.tracking_policy = model_monitoring.TrackingPolicy.from_dict(
338
- tracking_policy
339
- )
337
+ self.spec.tracking_policy = TrackingPolicy.from_dict(tracking_policy)
340
338
  else:
341
339
  # Tracking_policy is already a `model_monitoring.TrackingPolicy` object
342
340
  self.spec.tracking_policy = tracking_policy
@@ -476,6 +474,16 @@ class ServingRuntime(RemoteRuntime):
476
474
  child_function = self._spec.function_refs[function_name]
477
475
  trigger_args = stream.trigger_args or {}
478
476
 
477
+ if mlrun.mlconf.is_explicit_ack():
478
+ trigger_args["explicit_ack_mode"] = trigger_args.get(
479
+ "explicit_ack_mode", "explicitOnly"
480
+ )
481
+ extra_attributes = trigger_args.get("extra_attributes", {})
482
+ trigger_args["extra_attributes"] = extra_attributes
483
+ extra_attributes["workerAllocationMode"] = extra_attributes.get(
484
+ "workerAllocationMode", "static"
485
+ )
486
+
479
487
  if (
480
488
  stream.path.startswith("kafka://")
481
489
  or "kafka_bootstrap_servers" in stream.options
@@ -14,5 +14,4 @@
14
14
 
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
- from .abstract import SparkRuntimeHandler
18
17
  from .spark3job import Spark3Runtime
@@ -14,21 +14,15 @@
14
14
  import os.path
15
15
  import typing
16
16
  from copy import deepcopy
17
- from datetime import datetime
18
- from typing import Dict, Optional, Tuple
19
17
 
20
18
  from kubernetes import client
21
19
  from kubernetes.client.rest import ApiException
22
- from sqlalchemy.orm import Session
23
20
 
21
+ import mlrun.db
24
22
  import mlrun.errors
25
23
  import mlrun.utils.regex
26
- from mlrun.api.db.base import DBInterface
27
24
  from mlrun.config import config
28
- from mlrun.db import get_run_db
29
25
  from mlrun.errors import err_to_str
30
- from mlrun.runtimes.base import BaseRuntimeHandler
31
- from mlrun.runtimes.constants import RunStates, SparkApplicationStates
32
26
 
33
27
  from ...execution import MLClientCtx
34
28
  from ...model import RunObject
@@ -41,7 +35,7 @@ from ...utils import (
41
35
  verify_field_regex,
42
36
  verify_list_and_update_in,
43
37
  )
44
- from ..base import RunError, RuntimeClassMode
38
+ from ..base import RunError
45
39
  from ..kubejob import KubejobRuntime
46
40
  from ..pod import KubeResourceSpec
47
41
  from ..utils import get_item_name, get_k8s
@@ -242,7 +236,7 @@ class AbstractSparkRuntime(KubejobRuntime):
242
236
  sj.with_driver_requests(cpu=1, mem="512m")
243
237
 
244
238
  sj.deploy()
245
- get_run_db().delete_function(name=sj.metadata.name)
239
+ mlrun.db.get_run_db().delete_function(name=sj.metadata.name)
246
240
 
247
241
  def _is_using_gpu(self):
248
242
  driver_limits = self.spec.driver_resources.get("limits")
@@ -292,7 +286,7 @@ class AbstractSparkRuntime(KubejobRuntime):
292
286
  :return True if the function is ready (deployed)
293
287
  """
294
288
  # connect will populate the config from the server config
295
- get_run_db()
289
+ mlrun.db.get_run_db()
296
290
  if not self.spec.build.base_image:
297
291
  self.spec.build.base_image = self._default_image
298
292
  return super().deploy(
@@ -841,124 +835,3 @@ with ctx:
841
835
  @spec.setter
842
836
  def spec(self, spec):
843
837
  raise NotImplementedError()
844
-
845
-
846
- class SparkRuntimeHandler(BaseRuntimeHandler):
847
- kind = "spark"
848
- class_modes = {
849
- RuntimeClassMode.run: "spark",
850
- }
851
-
852
- def _resolve_crd_object_status_info(
853
- self, db: DBInterface, db_session: Session, crd_object
854
- ) -> Tuple[bool, Optional[datetime], Optional[str]]:
855
- state = crd_object.get("status", {}).get("applicationState", {}).get("state")
856
- in_terminal_state = state in SparkApplicationStates.terminal_states()
857
- desired_run_state = SparkApplicationStates.spark_application_state_to_run_state(
858
- state
859
- )
860
- completion_time = None
861
- if in_terminal_state:
862
- if crd_object.get("status", {}).get("terminationTime"):
863
- completion_time = datetime.fromisoformat(
864
- crd_object.get("status", {})
865
- .get("terminationTime")
866
- .replace("Z", "+00:00")
867
- )
868
- else:
869
- last_submission_attempt_time = crd_object.get("status", {}).get(
870
- "lastSubmissionAttemptTime"
871
- )
872
- if last_submission_attempt_time:
873
- last_submission_attempt_time = last_submission_attempt_time.replace(
874
- "Z", "+00:00"
875
- )
876
- completion_time = datetime.fromisoformat(
877
- last_submission_attempt_time
878
- )
879
- return in_terminal_state, completion_time, desired_run_state
880
-
881
- def _update_ui_url(
882
- self,
883
- db: DBInterface,
884
- db_session: Session,
885
- project: str,
886
- uid: str,
887
- crd_object,
888
- run: Dict = None,
889
- ):
890
- app_state = (
891
- crd_object.get("status", {}).get("applicationState", {}).get("state")
892
- )
893
- state = SparkApplicationStates.spark_application_state_to_run_state(app_state)
894
- ui_url = None
895
- if state == RunStates.running:
896
- ui_url = (
897
- crd_object.get("status", {})
898
- .get("driverInfo", {})
899
- .get("webUIIngressAddress")
900
- )
901
- db_ui_url = run.get("status", {}).get("ui_url")
902
- if db_ui_url == ui_url:
903
- return
904
- run.setdefault("status", {})["ui_url"] = ui_url
905
- db.store_run(db_session, run, uid, project)
906
-
907
- @staticmethod
908
- def _are_resources_coupled_to_run_object() -> bool:
909
- return True
910
-
911
- @staticmethod
912
- def _get_object_label_selector(object_id: str) -> str:
913
- return f"mlrun/uid={object_id}"
914
-
915
- @staticmethod
916
- def _get_main_runtime_resource_label_selector() -> str:
917
- """
918
- There are some runtimes which might have multiple k8s resources attached to a one runtime, in this case
919
- we don't want to pull logs from all but rather only for the "driver"/"launcher" etc
920
- :return: the label selector
921
- """
922
- return "spark-role=driver"
923
-
924
- @staticmethod
925
- def _get_crd_info() -> Tuple[str, str, str]:
926
- return (
927
- AbstractSparkRuntime.group,
928
- AbstractSparkRuntime.version,
929
- AbstractSparkRuntime.plural,
930
- )
931
-
932
- def _delete_extra_resources(
933
- self,
934
- db: DBInterface,
935
- db_session: Session,
936
- namespace: str,
937
- deleted_resources: typing.List[Dict],
938
- label_selector: str = None,
939
- force: bool = False,
940
- grace_period: int = None,
941
- ):
942
- """
943
- Handling config maps deletion
944
- """
945
- uids = []
946
- for crd_dict in deleted_resources:
947
- uid = crd_dict["metadata"].get("labels", {}).get("mlrun/uid", None)
948
- uids.append(uid)
949
-
950
- config_maps = get_k8s().v1api.list_namespaced_config_map(
951
- namespace, label_selector=label_selector
952
- )
953
- for config_map in config_maps.items:
954
- try:
955
- uid = config_map.metadata.labels.get("mlrun/uid", None)
956
- if force or uid in uids:
957
- get_k8s().v1api.delete_namespaced_config_map(
958
- config_map.metadata.name, namespace
959
- )
960
- logger.info(f"Deleted config map: {config_map.metadata.name}")
961
- except ApiException as exc:
962
- # ignore error if config map is already removed
963
- if exc.status != 404:
964
- raise
mlrun/runtimes/utils.py CHANGED
@@ -24,11 +24,9 @@ import pandas as pd
24
24
  from kubernetes import client
25
25
 
26
26
  import mlrun
27
- import mlrun.api.utils.builder
28
27
  import mlrun.common.constants
29
28
  import mlrun.common.schemas
30
29
  import mlrun.utils.regex
31
- from mlrun.api.utils.clients import nuclio
32
30
  from mlrun.errors import err_to_str
33
31
  from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
34
32
  from mlrun.runtimes.constants import MPIJobCRDVersions
@@ -62,7 +60,6 @@ global_context = _ContextStore()
62
60
 
63
61
 
64
62
  cached_mpijob_crd_version = None
65
- cached_nuclio_version = None
66
63
 
67
64
 
68
65
  # resolve mpijob runtime according to the mpi-operator's supported crd-version
@@ -119,29 +116,6 @@ def resolve_spark_operator_version():
119
116
  raise ValueError("Failed to resolve spark operator's version")
120
117
 
121
118
 
122
- # if nuclio version specified on mlrun config set it likewise,
123
- # if not specified, get it from nuclio api client
124
- # since this is a heavy operation (sending requests to API), and it's unlikely that the version
125
- # will change - cache it (this means if we upgrade nuclio, we need to restart mlrun to re-fetch the new version)
126
- def resolve_nuclio_version():
127
- global cached_nuclio_version
128
-
129
- if not cached_nuclio_version:
130
-
131
- # config override everything
132
- nuclio_version = config.nuclio_version
133
- if not nuclio_version and config.nuclio_dashboard_url:
134
- try:
135
- nuclio_client = nuclio.Client()
136
- nuclio_version = nuclio_client.get_dashboard_version()
137
- except Exception as exc:
138
- logger.warning("Failed to resolve nuclio version", exc=err_to_str(exc))
139
-
140
- cached_nuclio_version = nuclio_version
141
-
142
- return cached_nuclio_version
143
-
144
-
145
119
  def calc_hash(func, tag=""):
146
120
  # remove tag, hash, date from calculation
147
121
  tag = tag or func.metadata.tag
mlrun/serving/routers.py CHANGED
@@ -25,10 +25,10 @@ import numpy as np
25
25
 
26
26
  import mlrun
27
27
  import mlrun.common.model_monitoring
28
- import mlrun.common.schemas
29
- import mlrun.utils.model_monitoring
30
- from mlrun.utils import logger, now_date, parse_versioned_object_uri
28
+ import mlrun.common.schemas.model_monitoring
29
+ from mlrun.utils import logger, now_date
31
30
 
31
+ from ..common.helpers import parse_versioned_object_uri
32
32
  from ..config import config
33
33
  from .server import GraphServer
34
34
  from .utils import RouterToDict, _extract_input_data, _update_result_body
@@ -1066,13 +1066,13 @@ def _init_endpoint_record(
1066
1066
  project=project, kind="stream"
1067
1067
  ),
1068
1068
  active=True,
1069
- monitoring_mode=mlrun.common.model_monitoring.ModelMonitoringMode.enabled
1069
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1070
1070
  if voting_ensemble.context.server.track_models
1071
- else mlrun.common.model_monitoring.ModelMonitoringMode.disabled,
1071
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
1072
1072
  ),
1073
1073
  status=mlrun.common.schemas.ModelEndpointStatus(
1074
1074
  children=list(voting_ensemble.routes.keys()),
1075
- endpoint_type=mlrun.common.model_monitoring.EndpointType.ROUTER,
1075
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1076
1076
  children_uids=children_uids,
1077
1077
  ),
1078
1078
  )
@@ -1091,7 +1091,7 @@ def _init_endpoint_record(
1091
1091
  project=project, endpoint_id=model_endpoint
1092
1092
  )
1093
1093
  current_endpoint.status.endpoint_type = (
1094
- mlrun.common.model_monitoring.EndpointType.LEAF_EP
1094
+ mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1095
1095
  )
1096
1096
  db.create_model_endpoint(
1097
1097
  project=project,
mlrun/serving/server.py CHANGED
@@ -23,17 +23,19 @@ import uuid
23
23
  from typing import Optional, Union
24
24
 
25
25
  import mlrun
26
- import mlrun.utils.model_monitoring
27
- from mlrun.common.model_monitoring import FileTargetKind
26
+ import mlrun.common.helpers
27
+ import mlrun.model_monitoring
28
28
  from mlrun.config import config
29
29
  from mlrun.errors import err_to_str
30
30
  from mlrun.secrets import SecretsStore
31
31
 
32
+ from ..common.helpers import parse_versioned_object_uri
33
+ from ..common.schemas.model_monitoring.constants import FileTargetKind
32
34
  from ..datastore import get_stream_pusher
33
35
  from ..datastore.store_resources import ResourceCache
34
36
  from ..errors import MLRunInvalidArgumentError
35
37
  from ..model import ModelObj
36
- from ..utils import get_caller_globals, parse_versioned_object_uri
38
+ from ..utils import get_caller_globals
37
39
  from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
38
40
  from .utils import (
39
41
  event_id_key,
@@ -48,7 +50,6 @@ class _StreamContext:
48
50
  that will be used for pushing the events from the nuclio model serving function"""
49
51
 
50
52
  def __init__(self, enabled: bool, parameters: dict, function_uri: str):
51
-
52
53
  """
53
54
  Initialize _StreamContext object.
54
55
  :param enabled: A boolean indication for applying the stream context
@@ -71,7 +72,7 @@ class _StreamContext:
71
72
  function_uri, config.default_project
72
73
  )
73
74
 
74
- stream_uri = mlrun.utils.model_monitoring.get_stream_path(project=project)
75
+ stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
75
76
 
76
77
  if log_stream:
77
78
  # Update the stream path to the log stream value
@@ -467,6 +468,8 @@ class GraphContext:
467
468
  self.logger = nuclio_context.logger
468
469
  self.Response = nuclio_context.Response
469
470
  self.worker_id = nuclio_context.worker_id
471
+ if hasattr(nuclio_context, "platform"):
472
+ self.platform = nuclio_context.platform
470
473
  elif not logger:
471
474
  self.logger = mlrun.utils.helpers.logger
472
475
 
@@ -483,7 +486,7 @@ class GraphContext:
483
486
  @property
484
487
  def project(self):
485
488
  """current project name (for the current function)"""
486
- project, _, _, _ = mlrun.utils.parse_versioned_object_uri(
489
+ project, _, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
487
490
  self._server.function_uri
488
491
  )
489
492
  return project
@@ -521,13 +524,13 @@ class GraphContext:
521
524
  """
522
525
  if "://" in name:
523
526
  return name
524
- project, uri, tag, _ = mlrun.utils.parse_versioned_object_uri(
527
+ project, uri, tag, _ = mlrun.common.helpers.parse_versioned_object_uri(
525
528
  self._server.function_uri
526
529
  )
527
530
  if name.startswith("."):
528
531
  name = f"{uri}-{name[1:]}"
529
532
  else:
530
- project, name, tag, _ = mlrun.utils.parse_versioned_object_uri(
533
+ project, name, tag, _ = mlrun.common.helpers.parse_versioned_object_uri(
531
534
  name, project
532
535
  )
533
536
  (
mlrun/serving/states.py CHANGED
@@ -21,6 +21,8 @@ from copy import copy, deepcopy
21
21
  from inspect import getfullargspec, signature
22
22
  from typing import Union
23
23
 
24
+ import mlrun
25
+
24
26
  from ..config import config
25
27
  from ..datastore import get_stream_pusher
26
28
  from ..datastore.utils import parse_kafka_url
@@ -1512,5 +1514,9 @@ def _init_async_objects(context, steps):
1512
1514
  wait_for_result = True
1513
1515
 
1514
1516
  source_args = context.get_param("source_args", {})
1515
- default_source = storey.SyncEmitSource(context=context, **source_args)
1517
+ default_source = storey.SyncEmitSource(
1518
+ context=context,
1519
+ explicit_ack=mlrun.mlconf.is_explicit_ack(),
1520
+ **source_args,
1521
+ )
1516
1522
  return default_source, wait_for_result
@@ -17,13 +17,13 @@ import time
17
17
  import traceback
18
18
  from typing import Dict, Union
19
19
 
20
- import mlrun
21
20
  import mlrun.common.model_monitoring
22
- import mlrun.common.schemas
21
+ import mlrun.common.schemas.model_monitoring
23
22
  from mlrun.artifacts import ModelArtifact # noqa: F401
24
23
  from mlrun.config import config
25
- from mlrun.utils import logger, now_date, parse_versioned_object_uri
24
+ from mlrun.utils import logger, now_date
26
25
 
26
+ from ..common.helpers import parse_versioned_object_uri
27
27
  from .server import GraphServer
28
28
  from .utils import StepToDict, _extract_input_data, _update_result_body
29
29
 
@@ -516,12 +516,12 @@ def _init_endpoint_record(
516
516
  project=project, kind="stream"
517
517
  ),
518
518
  active=True,
519
- monitoring_mode=mlrun.common.model_monitoring.ModelMonitoringMode.enabled
519
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
520
520
  if model.context.server.track_models
521
- else mlrun.common.model_monitoring.ModelMonitoringMode.disabled,
521
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
522
522
  ),
523
523
  status=mlrun.common.schemas.ModelEndpointStatus(
524
- endpoint_type=mlrun.common.model_monitoring.EndpointType.NODE_EP
524
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
525
525
  ),
526
526
  )
527
527