mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -1
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +31 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +196 -0
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +13 -2
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +233 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +387 -119
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +245 -20
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +909 -231
  77. mlrun/db/nopdb.py +279 -14
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1176 -406
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +208 -181
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +54 -24
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/__init__.py +1 -0
  178. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  179. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  180. mlrun/runtimes/nuclio/application/application.py +758 -0
  181. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  182. mlrun/runtimes/nuclio/function.py +188 -68
  183. mlrun/runtimes/nuclio/serving.py +57 -60
  184. mlrun/runtimes/pod.py +191 -58
  185. mlrun/runtimes/remotesparkjob.py +11 -8
  186. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  187. mlrun/runtimes/utils.py +40 -73
  188. mlrun/secrets.py +6 -2
  189. mlrun/serving/__init__.py +8 -1
  190. mlrun/serving/remote.py +2 -3
  191. mlrun/serving/routers.py +89 -64
  192. mlrun/serving/server.py +54 -26
  193. mlrun/serving/states.py +187 -56
  194. mlrun/serving/utils.py +19 -11
  195. mlrun/serving/v2_serving.py +136 -63
  196. mlrun/track/tracker.py +2 -1
  197. mlrun/track/trackers/mlflow_tracker.py +5 -0
  198. mlrun/utils/async_http.py +26 -6
  199. mlrun/utils/db.py +18 -0
  200. mlrun/utils/helpers.py +375 -105
  201. mlrun/utils/http.py +2 -2
  202. mlrun/utils/logger.py +75 -9
  203. mlrun/utils/notifications/notification/__init__.py +14 -10
  204. mlrun/utils/notifications/notification/base.py +48 -0
  205. mlrun/utils/notifications/notification/console.py +2 -0
  206. mlrun/utils/notifications/notification/git.py +24 -1
  207. mlrun/utils/notifications/notification/ipython.py +2 -0
  208. mlrun/utils/notifications/notification/slack.py +96 -21
  209. mlrun/utils/notifications/notification/webhook.py +63 -2
  210. mlrun/utils/notifications/notification_pusher.py +146 -16
  211. mlrun/utils/regex.py +9 -0
  212. mlrun/utils/retryer.py +3 -2
  213. mlrun/utils/v3io_clients.py +2 -3
  214. mlrun/utils/version/version.json +2 -2
  215. mlrun-1.7.2.dist-info/METADATA +390 -0
  216. mlrun-1.7.2.dist-info/RECORD +351 -0
  217. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  218. mlrun/feature_store/retrieval/conversion.py +0 -271
  219. mlrun/kfpops.py +0 -868
  220. mlrun/model_monitoring/application.py +0 -310
  221. mlrun/model_monitoring/batch.py +0 -974
  222. mlrun/model_monitoring/controller_handler.py +0 -37
  223. mlrun/model_monitoring/prometheus.py +0 -216
  224. mlrun/model_monitoring/stores/__init__.py +0 -111
  225. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  226. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  227. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  228. mlrun/model_monitoring/stores/models/base.py +0 -84
  229. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  230. mlrun/platforms/other.py +0 -305
  231. mlrun-1.7.0rc4.dist-info/METADATA +0 -269
  232. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  233. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  234. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  235. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -14,17 +14,17 @@
14
14
 
15
15
  import json
16
16
  import os
17
+ import warnings
17
18
  from copy import deepcopy
18
- from typing import Union
19
+ from typing import TYPE_CHECKING, Optional, Union
19
20
 
20
21
  import nuclio
21
22
  from nuclio import KafkaTrigger
22
23
 
23
24
  import mlrun
24
25
  import mlrun.common.schemas
25
- from mlrun.datastore import parse_kafka_url
26
+ from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
26
27
  from mlrun.model import ObjectList
27
- from mlrun.model_monitoring.tracking_policy import TrackingPolicy
28
28
  from mlrun.runtimes.function_reference import FunctionReference
29
29
  from mlrun.secrets import SecretsStore
30
30
  from mlrun.serving.server import GraphServer, create_graph_server
@@ -43,6 +43,10 @@ from .function import NuclioSpec, RemoteRuntime
43
43
 
44
44
  serving_subkind = "serving_v2"
45
45
 
46
+ if TYPE_CHECKING:
47
+ # remove this block in 1.9.0
48
+ from mlrun.model_monitoring import TrackingPolicy
49
+
46
50
 
47
51
  def new_v2_model_server(
48
52
  name,
@@ -303,45 +307,34 @@ class ServingRuntime(RemoteRuntime):
303
307
 
304
308
  def set_tracking(
305
309
  self,
306
- stream_path: str = None,
307
- batch: int = None,
308
- sample: int = None,
309
- stream_args: dict = None,
310
- tracking_policy: Union[TrackingPolicy, dict] = None,
311
- ):
312
- """apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
313
- and analyze performance.
314
-
315
- :param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
316
- you can use the "dummy://" path for test/simulation.
317
- :param batch: Micro batch size (send micro batches of N records at a time).
318
- :param sample: Sample size (send only one of N records).
319
- :param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
320
- :param tracking_policy: Tracking policy object or a dictionary that will be converted into a tracking policy
321
- object. By using TrackingPolicy, the user can apply his model monitoring requirements,
322
- such as setting the scheduling policy of the model monitoring batch job or changing
323
- the image of the model monitoring stream.
324
-
325
- example::
326
-
327
- # initialize a new serving function
328
- serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
329
- # apply model monitoring and set monitoring batch job to run every 3 hours
330
- tracking_policy = {'default_batch_intervals':"0 */3 * * *"}
331
- serving_fn.set_tracking(tracking_policy=tracking_policy)
310
+ stream_path: Optional[str] = None,
311
+ batch: Optional[int] = None,
312
+ sample: Optional[int] = None,
313
+ stream_args: Optional[dict] = None,
314
+ tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
315
+ enable_tracking: bool = True,
316
+ ) -> None:
317
+ """Apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
318
+ and analyze performance.
319
+
320
+ :param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
321
+ you can use the "dummy://" path for test/simulation.
322
+ :param batch: Micro batch size (send micro batches of N records at a time).
323
+ :param sample: Sample size (send only one of N records).
324
+ :param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
325
+ :param enable_tracking: Enabled/Disable model-monitoring tracking.
326
+ Default True (tracking enabled).
327
+
328
+ Example::
329
+
330
+ # initialize a new serving function
331
+ serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
332
+ # apply model monitoring
333
+ serving_fn.set_tracking()
332
334
 
333
335
  """
334
-
335
336
  # Applying model monitoring configurations
336
- self.spec.track_models = True
337
- self.spec.tracking_policy = None
338
- if tracking_policy:
339
- if isinstance(tracking_policy, dict):
340
- # Convert tracking policy dictionary into `model_monitoring.TrackingPolicy` object
341
- self.spec.tracking_policy = TrackingPolicy.from_dict(tracking_policy)
342
- else:
343
- # Tracking_policy is already a `model_monitoring.TrackingPolicy` object
344
- self.spec.tracking_policy = tracking_policy
337
+ self.spec.track_models = enable_tracking
345
338
 
346
339
  if stream_path:
347
340
  self.spec.parameters["log_stream"] = stream_path
@@ -351,6 +344,14 @@ class ServingRuntime(RemoteRuntime):
351
344
  self.spec.parameters["log_stream_sample"] = sample
352
345
  if stream_args:
353
346
  self.spec.parameters["stream_args"] = stream_args
347
+ if tracking_policy is not None:
348
+ warnings.warn(
349
+ "The `tracking_policy` argument is deprecated from version 1.7.0 "
350
+ "and has no effect. It will be removed in 1.9.0.\n"
351
+ "To set the desired model monitoring time window and schedule, use "
352
+ "the `base_period` argument in `project.enable_model_monitoring()`.",
353
+ FutureWarning,
354
+ )
354
355
 
355
356
  def add_model(
356
357
  self,
@@ -367,8 +368,8 @@ class ServingRuntime(RemoteRuntime):
367
368
 
368
369
  Example, create a function (from the notebook), add a model class, and deploy::
369
370
 
370
- fn = code_to_function(kind='serving')
371
- fn.add_model('boost', model_path, model_class='MyClass', my_arg=5)
371
+ fn = code_to_function(kind="serving")
372
+ fn.add_model("boost", model_path, model_class="MyClass", my_arg=5)
372
373
  fn.deploy()
373
374
 
374
375
  only works with router topology, for nested topologies (model under router under flow)
@@ -450,7 +451,7 @@ class ServingRuntime(RemoteRuntime):
450
451
 
451
452
  example::
452
453
 
453
- fn.add_child_function('enrich', './enrich.ipynb', 'mlrun/mlrun')
454
+ fn.add_child_function("enrich", "./enrich.ipynb", "mlrun/mlrun")
454
455
 
455
456
  :param name: child function name
456
457
  :param url: function/code url, support .py, .ipynb, .yaml extensions
@@ -479,7 +480,7 @@ class ServingRuntime(RemoteRuntime):
479
480
  trigger_args = stream.trigger_args or {}
480
481
 
481
482
  engine = self.spec.graph.engine or "async"
482
- if mlrun.mlconf.is_explicit_ack() and engine == "async":
483
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
483
484
  trigger_args["explicit_ack_mode"] = trigger_args.get(
484
485
  "explicit_ack_mode", "explicitOnly"
485
486
  )
@@ -489,11 +490,8 @@ class ServingRuntime(RemoteRuntime):
489
490
  "worker_allocation_mode", "static"
490
491
  )
491
492
 
492
- if (
493
- stream.path.startswith("kafka://")
494
- or "kafka_bootstrap_servers" in stream.options
495
- ):
496
- brokers = stream.options.get("kafka_bootstrap_servers")
493
+ brokers = get_kafka_brokers_from_dict(stream.options)
494
+ if stream.path.startswith("kafka://") or brokers:
497
495
  if brokers:
498
496
  brokers = brokers.split(",")
499
497
  topic, brokers = parse_kafka_url(stream.path, brokers)
@@ -609,7 +607,7 @@ class ServingRuntime(RemoteRuntime):
609
607
  ):
610
608
  # initialize or create required streams/queues
611
609
  self.spec.graph.check_and_process_graph()
612
- self.spec.graph.init_queues()
610
+ self.spec.graph.create_queue_streams()
613
611
  functions_in_steps = self.spec.graph.list_child_functions()
614
612
  child_functions = list(self._spec.function_refs.keys())
615
613
  for function in functions_in_steps:
@@ -644,8 +642,7 @@ class ServingRuntime(RemoteRuntime):
644
642
  force_build=force_build,
645
643
  )
646
644
 
647
- def _get_runtime_env(self):
648
- env = super()._get_runtime_env()
645
+ def _get_serving_spec(self):
649
646
  function_name_uri_map = {f.name: f.uri(self) for f in self.spec.function_refs}
650
647
 
651
648
  serving_spec = {
@@ -658,9 +655,7 @@ class ServingRuntime(RemoteRuntime):
658
655
  "graph_initializer": self.spec.graph_initializer,
659
656
  "error_stream": self.spec.error_stream,
660
657
  "track_models": self.spec.track_models,
661
- "tracking_policy": self.spec.tracking_policy.to_dict()
662
- if self.spec.tracking_policy
663
- else None,
658
+ "tracking_policy": None,
664
659
  "default_content_type": self.spec.default_content_type,
665
660
  }
666
661
 
@@ -668,8 +663,7 @@ class ServingRuntime(RemoteRuntime):
668
663
  self._secrets = SecretsStore.from_list(self.spec.secret_sources)
669
664
  serving_spec["secret_sources"] = self._secrets.to_serial()
670
665
 
671
- env["SERVING_SPEC_ENV"] = json.dumps(serving_spec)
672
- return env
666
+ return json.dumps(serving_spec)
673
667
 
674
668
  def to_mock_server(
675
669
  self,
@@ -682,7 +676,6 @@ class ServingRuntime(RemoteRuntime):
682
676
  """create mock server object for local testing/emulation
683
677
 
684
678
  :param namespace: one or list of namespaces/modules to search the steps classes/functions in
685
- :param log_level: log level (error | info | debug)
686
679
  :param current_function: specify if you want to simulate a child function, * for all functions
687
680
  :param track_models: allow model tracking (disabled by default in the mock server)
688
681
  :param workdir: working directory to locate the source code (if not the current one)
@@ -710,7 +703,7 @@ class ServingRuntime(RemoteRuntime):
710
703
  verbose=self.verbose,
711
704
  current_function=current_function,
712
705
  graph_initializer=self.spec.graph_initializer,
713
- track_models=track_models and self.spec.track_models,
706
+ track_models=self.spec.track_models,
714
707
  function_uri=self._function_uri(),
715
708
  secret_sources=self.spec.secret_sources,
716
709
  default_content_type=self.spec.default_content_type,
@@ -721,6 +714,7 @@ class ServingRuntime(RemoteRuntime):
721
714
  namespace=namespace,
722
715
  logger=logger,
723
716
  is_mock=True,
717
+ monitoring_mock=track_models,
724
718
  )
725
719
 
726
720
  if workdir:
@@ -735,8 +729,11 @@ class ServingRuntime(RemoteRuntime):
735
729
  example::
736
730
 
737
731
  serving_fn = mlrun.new_function("serving", image="mlrun/mlrun", kind="serving")
738
- serving_fn.add_model('my-classifier',model_path=model_path,
739
- class_name='mlrun.frameworks.sklearn.SklearnModelServer')
732
+ serving_fn.add_model(
733
+ "my-classifier",
734
+ model_path=model_path,
735
+ class_name="mlrun.frameworks.sklearn.SklearnModelServer",
736
+ )
740
737
  serving_fn.plot(rankdir="LR")
741
738
 
742
739
  :param filename: target filepath for the image (None for the notebook)
mlrun/runtimes/pod.py CHANGED
@@ -15,12 +15,14 @@ import copy
15
15
  import inspect
16
16
  import os
17
17
  import re
18
+ import time
18
19
  import typing
19
20
  from enum import Enum
20
21
 
21
22
  import dotenv
22
- import kfp.dsl
23
23
  import kubernetes.client as k8s_client
24
+ import mlrun_pipelines.mounts
25
+ from mlrun_pipelines.mixins import KfpAdapterMixin
24
26
 
25
27
  import mlrun.errors
26
28
  import mlrun.utils.regex
@@ -36,11 +38,11 @@ from ..k8s_utils import (
36
38
  generate_preemptible_nodes_affinity_terms,
37
39
  generate_preemptible_nodes_anti_affinity_terms,
38
40
  generate_preemptible_tolerations,
41
+ validate_node_selectors,
39
42
  )
40
43
  from ..utils import logger, update_in
41
44
  from .base import BaseRuntime, FunctionSpec, spec_fields
42
45
  from .utils import (
43
- apply_kfp,
44
46
  get_gpu_from_resource_requirement,
45
47
  get_item_name,
46
48
  set_named_item,
@@ -214,9 +216,7 @@ class KubeResourceSpec(FunctionSpec):
214
216
  image_pull_secret or mlrun.mlconf.function.spec.image_pull_secret.default
215
217
  )
216
218
  self.node_name = node_name
217
- self.node_selector = (
218
- node_selector or mlrun.mlconf.get_default_function_node_selector()
219
- )
219
+ self.node_selector = node_selector or {}
220
220
  self._affinity = affinity
221
221
  self.priority_class_name = (
222
222
  priority_class_name or mlrun.mlconf.default_function_priority_class_name
@@ -531,7 +531,9 @@ class KubeResourceSpec(FunctionSpec):
531
531
  return
532
532
 
533
533
  # merge node selectors - precedence to existing node selector
534
- self.node_selector = {**node_selector, **self.node_selector}
534
+ self.node_selector = mlrun.utils.helpers.merge_dicts_with_precedence(
535
+ node_selector, self.node_selector
536
+ )
535
537
 
536
538
  def _merge_tolerations(
537
539
  self,
@@ -934,12 +936,12 @@ class AutoMountType(str, Enum):
934
936
  @classmethod
935
937
  def all_mount_modifiers(cls):
936
938
  return [
937
- mlrun.v3io_cred.__name__,
938
- mlrun.mount_v3io.__name__,
939
- mlrun.platforms.other.mount_pvc.__name__,
940
- mlrun.auto_mount.__name__,
941
- mlrun.platforms.mount_s3.__name__,
942
- mlrun.platforms.set_env_variables.__name__,
939
+ mlrun_pipelines.mounts.v3io_cred.__name__,
940
+ mlrun_pipelines.mounts.mount_v3io.__name__,
941
+ mlrun_pipelines.mounts.mount_pvc.__name__,
942
+ mlrun_pipelines.mounts.auto_mount.__name__,
943
+ mlrun_pipelines.mounts.mount_s3.__name__,
944
+ mlrun_pipelines.mounts.set_env_variables.__name__,
943
945
  ]
944
946
 
945
947
  @classmethod
@@ -956,27 +958,27 @@ class AutoMountType(str, Enum):
956
958
  def _get_auto_modifier():
957
959
  # If we're running on Iguazio - use v3io_cred
958
960
  if mlconf.igz_version != "":
959
- return mlrun.v3io_cred
961
+ return mlrun_pipelines.mounts.v3io_cred
960
962
  # Else, either pvc mount if it's configured or do nothing otherwise
961
963
  pvc_configured = (
962
964
  "MLRUN_PVC_MOUNT" in os.environ
963
965
  or "pvc_name" in mlconf.get_storage_auto_mount_params()
964
966
  )
965
- return mlrun.platforms.other.mount_pvc if pvc_configured else None
967
+ return mlrun_pipelines.mounts.mount_pvc if pvc_configured else None
966
968
 
967
969
  def get_modifier(self):
968
970
  return {
969
971
  AutoMountType.none: None,
970
- AutoMountType.v3io_credentials: mlrun.v3io_cred,
971
- AutoMountType.v3io_fuse: mlrun.mount_v3io,
972
- AutoMountType.pvc: mlrun.platforms.other.mount_pvc,
972
+ AutoMountType.v3io_credentials: mlrun_pipelines.mounts.v3io_cred,
973
+ AutoMountType.v3io_fuse: mlrun_pipelines.mounts.mount_v3io,
974
+ AutoMountType.pvc: mlrun_pipelines.mounts.mount_pvc,
973
975
  AutoMountType.auto: self._get_auto_modifier(),
974
- AutoMountType.s3: mlrun.platforms.mount_s3,
975
- AutoMountType.env: mlrun.platforms.set_env_variables,
976
+ AutoMountType.s3: mlrun_pipelines.mounts.mount_s3,
977
+ AutoMountType.env: mlrun_pipelines.mounts.set_env_variables,
976
978
  }[self]
977
979
 
978
980
 
979
- class KubeResource(BaseRuntime):
981
+ class KubeResource(BaseRuntime, KfpAdapterMixin):
980
982
  """
981
983
  A parent class for runtimes that generate k8s resources when executing.
982
984
  """
@@ -985,7 +987,7 @@ class KubeResource(BaseRuntime):
985
987
  _is_nested = True
986
988
 
987
989
  def __init__(self, spec=None, metadata=None):
988
- super().__init__(metadata, spec)
990
+ super().__init__(metadata=metadata, spec=spec)
989
991
  self.verbose = False
990
992
 
991
993
  @property
@@ -996,26 +998,6 @@ class KubeResource(BaseRuntime):
996
998
  def spec(self, spec):
997
999
  self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
998
1000
 
999
- def apply(self, modify):
1000
- """
1001
- Apply a modifier to the runtime which is used to change the runtimes k8s object's spec.
1002
- Modifiers can be either KFP modifiers or MLRun modifiers (which are compatible with KFP). All modifiers accept
1003
- a `kfp.dsl.ContainerOp` object, apply some changes on its spec and return it so modifiers can be chained
1004
- one after the other.
1005
-
1006
- :param modify: a modifier runnable object
1007
- :return: the runtime (self) after the modifications
1008
- """
1009
-
1010
- # Kubeflow pipeline have a hook to add the component to the DAG on ContainerOp init
1011
- # we remove the hook to suppress kubeflow op registration and return it after the apply()
1012
- old_op_handler = kfp.dsl._container_op._register_op_handler
1013
- kfp.dsl._container_op._register_op_handler = lambda x: self.metadata.name
1014
- cop = kfp.dsl.ContainerOp("name", "image")
1015
- kfp.dsl._container_op._register_op_handler = old_op_handler
1016
-
1017
- return apply_kfp(modify, cop, self)
1018
-
1019
1001
  def set_env_from_secret(self, name, secret=None, secret_key=None):
1020
1002
  """set pod environment var from secret"""
1021
1003
  secret_key = secret_key or name
@@ -1059,12 +1041,12 @@ class KubeResource(BaseRuntime):
1059
1041
 
1060
1042
  def _set_env(self, name, value=None, value_from=None):
1061
1043
  new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
1062
- i = 0
1063
- for v in self.spec.env:
1064
- if get_item_name(v) == name:
1065
- self.spec.env[i] = new_var
1044
+
1045
+ # ensure we don't have duplicate env vars with the same name
1046
+ for env_index, value_item in enumerate(self.spec.env):
1047
+ if get_item_name(value_item) == name:
1048
+ self.spec.env[env_index] = new_var
1066
1049
  return self
1067
- i += 1
1068
1050
  self.spec.env.append(new_var)
1069
1051
  return self
1070
1052
 
@@ -1125,12 +1107,12 @@ class KubeResource(BaseRuntime):
1125
1107
 
1126
1108
  :param state_thresholds: A dictionary of state to threshold. The supported states are:
1127
1109
 
1128
- * pending_scheduled - The pod/crd is scheduled on a node but not yet running
1129
- * pending_not_scheduled - The pod/crd is not yet scheduled on a node
1130
- * executing - The pod/crd started and is running
1131
- * image_pull_backoff - The pod/crd is in image pull backoff
1132
- See mlrun.mlconf.function.spec.state_thresholds for the default thresholds.
1110
+ * pending_scheduled - The pod/crd is scheduled on a node but not yet running
1111
+ * pending_not_scheduled - The pod/crd is not yet scheduled on a node
1112
+ * executing - The pod/crd started and is running
1113
+ * image_pull_backoff - The pod/crd is in image pull backoff
1133
1114
 
1115
+ See :code:`mlrun.mlconf.function.spec.state_thresholds` for the default thresholds.
1134
1116
  :param patch: Whether to merge the given thresholds with the existing thresholds (True, default)
1135
1117
  or override them (False)
1136
1118
  """
@@ -1193,9 +1175,10 @@ class KubeResource(BaseRuntime):
1193
1175
  """
1194
1176
  if node_name:
1195
1177
  self.spec.node_name = node_name
1196
- if node_selector:
1178
+ if node_selector is not None:
1179
+ validate_node_selectors(node_selectors=node_selector, raise_on_error=False)
1197
1180
  self.spec.node_selector = node_selector
1198
- if affinity:
1181
+ if affinity is not None:
1199
1182
  self.spec.affinity = affinity
1200
1183
  if tolerations is not None:
1201
1184
  self.spec.tolerations = tolerations
@@ -1251,9 +1234,9 @@ class KubeResource(BaseRuntime):
1251
1234
  from kubernetes import client as k8s_client
1252
1235
 
1253
1236
  security_context = k8s_client.V1SecurityContext(
1254
- run_as_user=1000,
1255
- run_as_group=3000,
1256
- )
1237
+ run_as_user=1000,
1238
+ run_as_group=3000,
1239
+ )
1257
1240
  function.with_security_context(security_context)
1258
1241
 
1259
1242
  More info:
@@ -1312,6 +1295,156 @@ class KubeResource(BaseRuntime):
1312
1295
 
1313
1296
  self.spec.validate_service_account(allowed_service_accounts)
1314
1297
 
1298
+ def _configure_mlrun_build_with_source(
1299
+ self, source, workdir=None, handler=None, pull_at_runtime=True, target_dir=None
1300
+ ):
1301
+ mlrun.utils.helpers.validate_builder_source(source, pull_at_runtime, workdir)
1302
+
1303
+ self.spec.build.source = source
1304
+ if handler:
1305
+ self.spec.default_handler = handler
1306
+ if workdir:
1307
+ self.spec.workdir = workdir
1308
+ if target_dir:
1309
+ self.spec.build.source_code_target_dir = target_dir
1310
+
1311
+ self.spec.build.load_source_on_run = pull_at_runtime
1312
+ if (
1313
+ self.spec.build.base_image
1314
+ and not self.spec.build.commands
1315
+ and pull_at_runtime
1316
+ and not self.spec.image
1317
+ ):
1318
+ # if we load source from repo and don't need a full build use the base_image as the image
1319
+ self.spec.image = self.spec.build.base_image
1320
+ elif not pull_at_runtime:
1321
+ # clear the image so build will not be skipped
1322
+ self.spec.build.base_image = self.spec.build.base_image or self.spec.image
1323
+ self.spec.image = ""
1324
+
1325
+ def _resolve_build_with_mlrun(self, with_mlrun: typing.Optional[bool] = None):
1326
+ build = self.spec.build
1327
+ if with_mlrun is None:
1328
+ if build.with_mlrun is not None:
1329
+ with_mlrun = build.with_mlrun
1330
+ else:
1331
+ with_mlrun = build.base_image and not (
1332
+ build.base_image.startswith("mlrun/")
1333
+ or "/mlrun/" in build.base_image
1334
+ )
1335
+ if (
1336
+ not build.source
1337
+ and not build.commands
1338
+ and not build.requirements
1339
+ and not build.extra
1340
+ and with_mlrun
1341
+ ):
1342
+ logger.info(
1343
+ "Running build to add mlrun package, set "
1344
+ "with_mlrun=False to skip if its already in the image"
1345
+ )
1346
+ return with_mlrun
1347
+
1348
+ def _build_image(
1349
+ self,
1350
+ builder_env: dict,
1351
+ force_build: bool,
1352
+ mlrun_version_specifier: typing.Optional[bool],
1353
+ show_on_failure: bool,
1354
+ skip_deployed: bool,
1355
+ watch: bool,
1356
+ is_kfp: bool,
1357
+ with_mlrun: typing.Optional[bool],
1358
+ ):
1359
+ # When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
1360
+ # is actually done. (when a pipelines pod exits, the pipeline step marked as done)
1361
+ if is_kfp:
1362
+ watch = True
1363
+
1364
+ if skip_deployed and self.requires_build() and not self.is_deployed():
1365
+ logger.warning(
1366
+ f"Even though {skip_deployed=}, the build might be triggered due to the function's configuration. "
1367
+ "See requires_build() and is_deployed() for reasoning."
1368
+ )
1369
+
1370
+ db = self._get_db()
1371
+ data = db.remote_builder(
1372
+ self,
1373
+ with_mlrun,
1374
+ mlrun_version_specifier,
1375
+ skip_deployed,
1376
+ builder_env=builder_env,
1377
+ force_build=force_build,
1378
+ )
1379
+ self.status = data["data"].get("status", None)
1380
+ self.spec.image = mlrun.utils.get_in(
1381
+ data, "data.spec.image"
1382
+ ) or mlrun.utils.get_in(data, "data.spec.build.image")
1383
+ self.spec.build.base_image = self.spec.build.base_image or mlrun.utils.get_in(
1384
+ data, "data.spec.build.base_image"
1385
+ )
1386
+ # Get the source target dir in case it was enriched due to loading source
1387
+ self.spec.build.source_code_target_dir = mlrun.utils.get_in(
1388
+ data, "data.spec.build.source_code_target_dir"
1389
+ ) or mlrun.utils.get_in(data, "data.spec.clone_target_dir")
1390
+ ready = data.get("ready", False)
1391
+ if not ready:
1392
+ logger.info(
1393
+ f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
1394
+ )
1395
+ if watch and not ready:
1396
+ state = self._build_watch(
1397
+ watch=watch,
1398
+ show_on_failure=show_on_failure,
1399
+ )
1400
+ ready = state == "ready"
1401
+ self.status.state = state
1402
+
1403
+ if watch and not ready:
1404
+ raise mlrun.errors.MLRunRuntimeError("Deploy failed")
1405
+ return ready
1406
+
1407
+ def _build_watch(
1408
+ self,
1409
+ watch: bool = True,
1410
+ logs: bool = True,
1411
+ show_on_failure: bool = False,
1412
+ ):
1413
+ db = self._get_db()
1414
+ offset = 0
1415
+ try:
1416
+ text, _ = db.get_builder_status(self, 0, logs=logs)
1417
+ except mlrun.db.RunDBError:
1418
+ raise ValueError("function or build process not found")
1419
+
1420
+ def print_log(text):
1421
+ if text and (
1422
+ not show_on_failure
1423
+ or self.status.state == mlrun.common.schemas.FunctionState.error
1424
+ ):
1425
+ print(text, end="")
1426
+
1427
+ print_log(text)
1428
+ offset += len(text)
1429
+ if watch:
1430
+ while self.status.state in [
1431
+ mlrun.common.schemas.FunctionState.pending,
1432
+ mlrun.common.schemas.FunctionState.running,
1433
+ ]:
1434
+ time.sleep(2)
1435
+ if show_on_failure:
1436
+ text = ""
1437
+ db.get_builder_status(self, 0, logs=False)
1438
+ if self.status.state == mlrun.common.schemas.FunctionState.error:
1439
+ # re-read the full log on failure
1440
+ text, _ = db.get_builder_status(self, offset, logs=logs)
1441
+ else:
1442
+ text, _ = db.get_builder_status(self, offset, logs=logs)
1443
+ print_log(text)
1444
+ offset += len(text)
1445
+
1446
+ return self.status.state
1447
+
1315
1448
 
1316
1449
  def _resolve_if_type_sanitized(attribute_name, attribute):
1317
1450
  attribute_config = sanitized_attributes[attribute_name]
@@ -1391,7 +1524,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
1391
1524
 
1392
1525
  # check if attribute of type dict, and then check if type is sanitized
1393
1526
  if isinstance(attribute, dict):
1394
- if attribute_config["not_sanitized_class"] != dict:
1527
+ if not isinstance(attribute_config["not_sanitized_class"], dict):
1395
1528
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
1396
1529
  f"expected to be of type {attribute_config.get('not_sanitized_class')} but got dict"
1397
1530
  )
@@ -1401,7 +1534,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
1401
1534
  elif isinstance(attribute, list) and not isinstance(
1402
1535
  attribute[0], attribute_config["sub_attribute_type"]
1403
1536
  ):
1404
- if attribute_config["not_sanitized_class"] != list:
1537
+ if not isinstance(attribute_config["not_sanitized_class"], list):
1405
1538
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
1406
1539
  f"expected to be of type {attribute_config.get('not_sanitized_class')} but got list"
1407
1540
  )
@@ -15,11 +15,11 @@ import re
15
15
  from subprocess import run
16
16
 
17
17
  import kubernetes.client
18
+ from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
18
19
 
19
20
  import mlrun.errors
20
21
  from mlrun.config import config
21
22
 
22
- from ..platforms.iguazio import mount_v3io, mount_v3iod
23
23
  from .kubejob import KubejobRuntime
24
24
  from .pod import KubeResourceSpec
25
25
 
@@ -102,16 +102,13 @@ class RemoteSparkRuntime(KubejobRuntime):
102
102
 
103
103
  @classmethod
104
104
  def deploy_default_image(cls):
105
- from mlrun import get_run_db
106
- from mlrun.run import new_function
107
-
108
- sj = new_function(
105
+ sj = mlrun.new_function(
109
106
  kind="remote-spark", name="remote-spark-default-image-deploy-temp"
110
107
  )
111
108
  sj.spec.build.image = cls.default_image
112
109
  sj.with_spark_service(spark_service="dummy-spark")
113
110
  sj.deploy()
114
- get_run_db().delete_function(name=sj.metadata.name)
111
+ mlrun.get_run_db().delete_function(name=sj.metadata.name)
115
112
 
116
113
  def is_deployed(self):
117
114
  if (
@@ -130,14 +127,20 @@ class RemoteSparkRuntime(KubejobRuntime):
130
127
  def spec(self, spec):
131
128
  self._spec = self._verify_dict(spec, "spec", RemoteSparkSpec)
132
129
 
133
- def with_spark_service(self, spark_service, provider=RemoteSparkProviders.iguazio):
130
+ def with_spark_service(
131
+ self,
132
+ spark_service,
133
+ provider=RemoteSparkProviders.iguazio,
134
+ with_v3io_mount=True,
135
+ ):
134
136
  """Attach spark service to function"""
135
137
  self.spec.provider = provider
136
138
  if provider == RemoteSparkProviders.iguazio:
137
139
  self.spec.env.append(
138
140
  {"name": "MLRUN_SPARK_CLIENT_IGZ_SPARK", "value": "true"}
139
141
  )
140
- self.apply(mount_v3io())
142
+ if with_v3io_mount:
143
+ self.apply(mount_v3io())
141
144
  self.apply(
142
145
  mount_v3iod(
143
146
  namespace=config.namespace,