mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -14,15 +14,16 @@
14
14
  import typing
15
15
 
16
16
  import kubernetes.client
17
+ from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
17
18
 
18
19
  import mlrun.common.schemas.function
19
20
  import mlrun.errors
21
+ import mlrun.k8s_utils
20
22
  import mlrun.runtimes.pod
21
23
  from mlrun.config import config
22
24
 
23
25
  from ...execution import MLClientCtx
24
26
  from ...model import RunObject
25
- from ...platforms.iguazio import mount_v3io, mount_v3iod
26
27
  from ...utils import update_in, verify_field_regex
27
28
  from ..kubejob import KubejobRuntime
28
29
  from ..pod import KubeResourceSpec
@@ -451,7 +452,7 @@ class Spark3JobSpec(KubeResourceSpec):
451
452
  class Spark3Runtime(KubejobRuntime):
452
453
  group = "sparkoperator.k8s.io"
453
454
  version = "v1beta2"
454
- apiVersion = group + "/" + version
455
+ apiVersion = group + "/" + version # noqa: N815
455
456
  kind = "spark"
456
457
  plural = "sparkapplications"
457
458
 
@@ -505,13 +506,11 @@ class Spark3Runtime(KubejobRuntime):
505
506
  raise NotImplementedError(
506
507
  "Setting node name is not supported for spark runtime"
507
508
  )
508
- # TODO add affinity support
509
- # https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go#L491
510
- if affinity:
511
- raise NotImplementedError(
512
- "Setting affinity is not supported for spark runtime"
513
- )
514
- super().with_node_selection(node_name, node_selector, affinity, tolerations)
509
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
510
+ self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
511
+ self.with_executor_node_selection(
512
+ node_name, node_selector, affinity, tolerations
513
+ )
515
514
 
516
515
  def with_driver_node_selection(
517
516
  self,
@@ -537,11 +536,12 @@ class Spark3Runtime(KubejobRuntime):
537
536
  raise NotImplementedError(
538
537
  "Setting node name is not supported for spark runtime"
539
538
  )
540
- if affinity:
539
+ if affinity is not None:
541
540
  self.spec.driver_affinity = affinity
542
- if node_selector:
541
+ if node_selector is not None:
542
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
543
543
  self.spec.driver_node_selector = node_selector
544
- if tolerations:
544
+ if tolerations is not None:
545
545
  self.spec.driver_tolerations = tolerations
546
546
 
547
547
  def with_executor_node_selection(
@@ -568,11 +568,12 @@ class Spark3Runtime(KubejobRuntime):
568
568
  raise NotImplementedError(
569
569
  "Setting node name is not supported for spark runtime"
570
570
  )
571
- if affinity:
571
+ if affinity is not None:
572
572
  self.spec.executor_affinity = affinity
573
- if node_selector:
573
+ if node_selector is not None:
574
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
574
575
  self.spec.executor_node_selector = node_selector
575
- if tolerations:
576
+ if tolerations is not None:
576
577
  self.spec.executor_tolerations = tolerations
577
578
 
578
579
  def with_preemption_mode(
@@ -811,9 +812,7 @@ class Spark3Runtime(KubejobRuntime):
811
812
 
812
813
  @classmethod
813
814
  def deploy_default_image(cls, with_gpu=False):
814
- from mlrun.run import new_function
815
-
816
- sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
815
+ sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
817
816
  sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
818
817
 
819
818
  # setting required resources
mlrun/runtimes/utils.py CHANGED
@@ -20,17 +20,17 @@ from io import StringIO
20
20
  from sys import stderr
21
21
 
22
22
  import pandas as pd
23
- from kubernetes import client
24
23
 
25
24
  import mlrun
26
25
  import mlrun.common.constants
26
+ import mlrun.common.constants as mlrun_constants
27
27
  import mlrun.common.schemas
28
28
  import mlrun.utils.regex
29
29
  from mlrun.artifacts import TableArtifact
30
+ from mlrun.common.runtimes.constants import RunLabels
30
31
  from mlrun.config import config
31
32
  from mlrun.errors import err_to_str
32
33
  from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
33
- from mlrun.runtimes.constants import RunLabels
34
34
  from mlrun.runtimes.generators import selector
35
35
  from mlrun.utils import get_in, helpers, logger, verify_field_regex
36
36
 
@@ -39,9 +39,6 @@ class RunError(Exception):
39
39
  pass
40
40
 
41
41
 
42
- mlrun_key = "mlrun/"
43
-
44
-
45
42
  class _ContextStore:
46
43
  def __init__(self):
47
44
  self._context = None
@@ -280,43 +277,6 @@ def get_item_name(item, attr="name"):
280
277
  return getattr(item, attr, None)
281
278
 
282
279
 
283
- def apply_kfp(modify, cop, runtime):
284
- modify(cop)
285
-
286
- # Have to do it here to avoid circular dependencies
287
- from .pod import AutoMountType
288
-
289
- if AutoMountType.is_auto_modifier(modify):
290
- runtime.spec.disable_auto_mount = True
291
-
292
- api = client.ApiClient()
293
- for k, v in cop.pod_labels.items():
294
- runtime.metadata.labels[k] = v
295
- for k, v in cop.pod_annotations.items():
296
- runtime.metadata.annotations[k] = v
297
- if cop.container.env:
298
- env_names = [
299
- e.name if hasattr(e, "name") else e["name"] for e in runtime.spec.env
300
- ]
301
- for e in api.sanitize_for_serialization(cop.container.env):
302
- name = e["name"]
303
- if name in env_names:
304
- runtime.spec.env[env_names.index(name)] = e
305
- else:
306
- runtime.spec.env.append(e)
307
- env_names.append(name)
308
- cop.container.env.clear()
309
-
310
- if cop.volumes and cop.container.volume_mounts:
311
- vols = api.sanitize_for_serialization(cop.volumes)
312
- mounts = api.sanitize_for_serialization(cop.container.volume_mounts)
313
- runtime.spec.update_vols_and_mounts(vols, mounts)
314
- cop.volumes.clear()
315
- cop.container.volume_mounts.clear()
316
-
317
- return runtime
318
-
319
-
320
280
  def verify_limits(
321
281
  resources_field_name,
322
282
  mem=None,
@@ -410,41 +370,13 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
410
370
 
411
371
 
412
372
  def get_func_selector(project, name=None, tag=None):
413
- s = [f"{mlrun_key}project={project}"]
373
+ s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
414
374
  if name:
415
- s.append(f"{mlrun_key}function={name}")
416
- s.append(f"{mlrun_key}tag={tag or 'latest'}")
375
+ s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
376
+ s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
417
377
  return s
418
378
 
419
379
 
420
- class k8s_resource:
421
- kind = ""
422
- per_run = False
423
- per_function = False
424
- k8client = None
425
-
426
- def deploy_function(self, function):
427
- pass
428
-
429
- def release_function(self, function):
430
- pass
431
-
432
- def submit_run(self, function, runobj):
433
- pass
434
-
435
- def get_object(self, name, namespace=None):
436
- return None
437
-
438
- def get_status(self, name, namespace=None):
439
- return None
440
-
441
- def del_object(self, name, namespace=None):
442
- pass
443
-
444
- def get_pods(self, name, namespace=None, master=False):
445
- return {}
446
-
447
-
448
380
  def enrich_function_from_dict(function, function_dict):
449
381
  override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
450
382
  for attribute in [
@@ -504,6 +436,7 @@ def enrich_run_labels(
504
436
  ):
505
437
  labels_enrichment = {
506
438
  RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
439
+ # TODO: remove this in 1.9.0
507
440
  RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
508
441
  }
509
442
  labels_to_enrich = labels_to_enrich or RunLabels.all()
@@ -512,3 +445,37 @@ def enrich_run_labels(
512
445
  if label.value not in labels and enrichment:
513
446
  labels[label.value] = enrichment
514
447
  return labels
448
+
449
+
450
+ def resolve_node_selectors(
451
+ project_node_selector: dict, instance_node_selector: dict
452
+ ) -> dict:
453
+ config_node_selector = mlrun.mlconf.get_default_function_node_selector()
454
+ if project_node_selector or config_node_selector:
455
+ mlrun.utils.logger.debug(
456
+ "Enriching node selector from project and mlrun config",
457
+ project_node_selector=project_node_selector,
458
+ config_node_selector=config_node_selector,
459
+ )
460
+ return mlrun.utils.helpers.merge_dicts_with_precedence(
461
+ config_node_selector,
462
+ project_node_selector,
463
+ instance_node_selector,
464
+ )
465
+ return instance_node_selector
466
+
467
+
468
+ def enrich_gateway_timeout_annotations(annotations: dict, gateway_timeout: int):
469
+ """
470
+ Set gateway proxy connect/read/send timeout annotations
471
+ :param annotations: The annotations to enrich
472
+ :param gateway_timeout: The timeout to set
473
+ """
474
+ if not gateway_timeout:
475
+ return
476
+ gateway_timeout_str = str(gateway_timeout)
477
+ annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
478
+ gateway_timeout_str
479
+ )
480
+ annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = gateway_timeout_str
481
+ annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = gateway_timeout_str
mlrun/secrets.py CHANGED
@@ -163,15 +163,19 @@ def get_secret_or_env(
163
163
 
164
164
  Example::
165
165
 
166
- secrets = { "KEY1": "VALUE1" }
166
+ secrets = {"KEY1": "VALUE1"}
167
167
  secret = get_secret_or_env("KEY1", secret_provider=secrets)
168
168
 
169
+
169
170
  # Using a function to retrieve a secret
170
171
  def my_secret_provider(key):
171
172
  # some internal logic to retrieve secret
172
173
  return value
173
174
 
174
- secret = get_secret_or_env("KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS")
175
+
176
+ secret = get_secret_or_env(
177
+ "KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS"
178
+ )
175
179
 
176
180
  :param key: Secret key to look for
177
181
  :param secret_provider: Dictionary, callable or `SecretsStore` to extract the secret value from. If using a
mlrun/serving/__init__.py CHANGED
@@ -22,10 +22,17 @@ __all__ = [
22
22
  "RouterStep",
23
23
  "QueueStep",
24
24
  "ErrorStep",
25
+ "MonitoringApplicationStep",
25
26
  ]
26
27
 
27
28
  from .routers import ModelRouter, VotingEnsemble # noqa
28
29
  from .server import GraphContext, GraphServer, create_graph_server # noqa
29
- from .states import ErrorStep, QueueStep, RouterStep, TaskStep # noqa
30
+ from .states import (
31
+ ErrorStep,
32
+ QueueStep,
33
+ RouterStep,
34
+ TaskStep,
35
+ MonitoringApplicationStep,
36
+ ) # noqa
30
37
  from .v1_serving import MLModelServer, new_v1_model_server # noqa
31
38
  from .v2_serving import V2ModelServer # noqa
mlrun/serving/remote.py CHANGED
@@ -172,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
172
172
  if not self._session:
173
173
  self._session = mlrun.utils.HTTPSessionWithRetry(
174
174
  self.retries,
175
- self.backoff_factor
176
- or mlrun.config.config.http_retry_defaults.backoff_factor,
175
+ self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
177
176
  retry_on_exception=False,
178
177
  retry_on_status=self.retries > 0,
179
178
  retry_on_post=True,
@@ -185,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
185
184
  resp = self._session.request(
186
185
  method,
187
186
  url,
188
- verify=mlrun.config.config.httpdb.http.verify,
187
+ verify=mlrun.mlconf.httpdb.http.verify,
189
188
  headers=headers,
190
189
  data=body,
191
190
  timeout=self.timeout,
mlrun/serving/routers.py CHANGED
@@ -28,10 +28,10 @@ import numpy as np
28
28
  import mlrun
29
29
  import mlrun.common.model_monitoring
30
30
  import mlrun.common.schemas.model_monitoring
31
+ from mlrun.errors import err_to_str
31
32
  from mlrun.utils import logger, now_date
32
33
 
33
34
  from ..common.helpers import parse_versioned_object_uri
34
- from ..config import config
35
35
  from .server import GraphServer
36
36
  from .utils import RouterToDict, _extract_input_data, _update_result_body
37
37
  from .v2_serving import _ModelLogPusher
@@ -271,7 +271,9 @@ class ParallelRun(BaseModelRouter):
271
271
  fn = mlrun.new_function("parallel", kind="serving")
272
272
  graph = fn.set_topology(
273
273
  "router",
274
- mlrun.serving.routers.ParallelRun(extend_event=True, executor_type=executor),
274
+ mlrun.serving.routers.ParallelRun(
275
+ extend_event=True, executor_type=executor
276
+ ),
275
277
  )
276
278
  graph.add_route("child1", class_name="Cls1")
277
279
  graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
@@ -489,6 +491,7 @@ class VotingEnsemble(ParallelRun):
489
491
  executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
490
492
  format_response_with_col_name_flag: bool = False,
491
493
  prediction_col_name: str = "prediction",
494
+ shard_by_endpoint: typing.Optional[bool] = None,
492
495
  **kwargs,
493
496
  ):
494
497
  """Voting Ensemble
@@ -578,6 +581,8 @@ class VotingEnsemble(ParallelRun):
578
581
  `{id: <id>, model_name: <name>, outputs: {..., prediction: [<predictions>], ...}}`
579
582
  the prediction_col_name should be `prediction`.
580
583
  by default, `prediction`
584
+ :param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
585
+ monitoring stream. Defaults to True.
581
586
  :param kwargs: extra arguments
582
587
  """
583
588
  super().__init__(
@@ -604,6 +609,7 @@ class VotingEnsemble(ParallelRun):
604
609
  self.prediction_col_name = prediction_col_name or "prediction"
605
610
  self.format_response_with_col_name_flag = format_response_with_col_name_flag
606
611
  self.model_endpoint_uid = None
612
+ self.shard_by_endpoint = shard_by_endpoint
607
613
 
608
614
  def post_init(self, mode="sync"):
609
615
  server = getattr(self.context, "_server", None) or getattr(
@@ -613,7 +619,7 @@ class VotingEnsemble(ParallelRun):
613
619
  logger.warn("GraphServer not initialized for VotingEnsemble instance")
614
620
  return
615
621
 
616
- if not self.context.is_mock or self.context.server.track_models:
622
+ if not self.context.is_mock or self.context.monitoring_mock:
617
623
  self.model_endpoint_uid = _init_endpoint_record(server, self)
618
624
 
619
625
  self._update_weights(self.weights)
@@ -905,7 +911,12 @@ class VotingEnsemble(ParallelRun):
905
911
  if self._model_logger and self.log_router:
906
912
  if "id" not in request:
907
913
  request["id"] = response.body["id"]
908
- self._model_logger.push(start, request, response.body)
914
+ partition_key = (
915
+ self.model_endpoint_uid if self.shard_by_endpoint is not False else None
916
+ )
917
+ self._model_logger.push(
918
+ start, request, response.body, partition_key=partition_key
919
+ )
909
920
  event.body = _update_result_body(
910
921
  self._result_path, original_body, response.body if response else None
911
922
  )
@@ -1013,7 +1024,7 @@ def _init_endpoint_record(
1013
1024
  graph_server.function_uri
1014
1025
  )
1015
1026
  except Exception as e:
1016
- logger.error("Failed to parse function URI", exc=e)
1027
+ logger.error("Failed to parse function URI", exc=err_to_str(e))
1017
1028
  return None
1018
1029
 
1019
1030
  # Generating version model value based on the model name and model version
@@ -1027,74 +1038,88 @@ def _init_endpoint_record(
1027
1038
  function_uri=graph_server.function_uri, versioned_model=versioned_model_name
1028
1039
  ).uid
1029
1040
 
1030
- # If model endpoint object was found in DB, skip the creation process.
1031
1041
  try:
1032
- mlrun.get_run_db().get_model_endpoint(project=project, endpoint_id=endpoint_uid)
1033
-
1042
+ model_ep = mlrun.get_run_db().get_model_endpoint(
1043
+ project=project, endpoint_id=endpoint_uid
1044
+ )
1034
1045
  except mlrun.errors.MLRunNotFoundError:
1046
+ model_ep = None
1047
+ except mlrun.errors.MLRunBadRequestError as err:
1048
+ logger.debug(
1049
+ f"Cant reach to model endpoints store, due to : {err}",
1050
+ )
1051
+ return
1052
+
1053
+ if voting_ensemble.context.server.track_models and not model_ep:
1035
1054
  logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
1055
+ # Get the children model endpoints ids
1056
+ children_uids = []
1057
+ for _, c in voting_ensemble.routes.items():
1058
+ if hasattr(c, "endpoint_uid"):
1059
+ children_uids.append(c.endpoint_uid)
1060
+ model_endpoint = mlrun.common.schemas.ModelEndpoint(
1061
+ metadata=mlrun.common.schemas.ModelEndpointMetadata(
1062
+ project=project, uid=endpoint_uid
1063
+ ),
1064
+ spec=mlrun.common.schemas.ModelEndpointSpec(
1065
+ function_uri=graph_server.function_uri,
1066
+ model=versioned_model_name,
1067
+ model_class=voting_ensemble.__class__.__name__,
1068
+ stream_path=voting_ensemble.context.stream.stream_uri,
1069
+ active=True,
1070
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
1071
+ ),
1072
+ status=mlrun.common.schemas.ModelEndpointStatus(
1073
+ children=list(voting_ensemble.routes.keys()),
1074
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1075
+ children_uids=children_uids,
1076
+ ),
1077
+ )
1036
1078
 
1037
- try:
1038
- # Get the children model endpoints ids
1039
- children_uids = []
1040
- for _, c in voting_ensemble.routes.items():
1041
- if hasattr(c, "endpoint_uid"):
1042
- children_uids.append(c.endpoint_uid)
1043
-
1044
- model_endpoint = mlrun.common.schemas.ModelEndpoint(
1045
- metadata=mlrun.common.schemas.ModelEndpointMetadata(
1046
- project=project, uid=endpoint_uid
1047
- ),
1048
- spec=mlrun.common.schemas.ModelEndpointSpec(
1049
- function_uri=graph_server.function_uri,
1050
- model=versioned_model_name,
1051
- model_class=voting_ensemble.__class__.__name__,
1052
- stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
1053
- project=project, kind="stream"
1054
- ),
1055
- active=True,
1056
- monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1057
- if voting_ensemble.context.server.track_models
1058
- else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
1059
- ),
1060
- status=mlrun.common.schemas.ModelEndpointStatus(
1061
- children=list(voting_ensemble.routes.keys()),
1062
- endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1063
- children_uids=children_uids,
1064
- ),
1065
- )
1079
+ db = mlrun.get_run_db()
1066
1080
 
1067
- db = mlrun.get_run_db()
1081
+ db.create_model_endpoint(
1082
+ project=project,
1083
+ endpoint_id=model_endpoint.metadata.uid,
1084
+ model_endpoint=model_endpoint.dict(),
1085
+ )
1068
1086
 
1087
+ # Update model endpoint children type
1088
+ for model_endpoint in children_uids:
1089
+ current_endpoint = db.get_model_endpoint(
1090
+ project=project, endpoint_id=model_endpoint
1091
+ )
1092
+ current_endpoint.status.endpoint_type = (
1093
+ mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1094
+ )
1069
1095
  db.create_model_endpoint(
1070
1096
  project=project,
1071
- endpoint_id=model_endpoint.metadata.uid,
1072
- model_endpoint=model_endpoint.dict(),
1097
+ endpoint_id=model_endpoint,
1098
+ model_endpoint=current_endpoint,
1073
1099
  )
1074
-
1075
- # Update model endpoint children type
1076
- for model_endpoint in children_uids:
1077
- current_endpoint = db.get_model_endpoint(
1078
- project=project, endpoint_id=model_endpoint
1079
- )
1080
- current_endpoint.status.endpoint_type = (
1081
- mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1082
- )
1083
- db.create_model_endpoint(
1084
- project=project,
1085
- endpoint_id=model_endpoint,
1086
- model_endpoint=current_endpoint,
1087
- )
1088
-
1089
- except Exception as exc:
1090
- logger.warning(
1091
- "Failed creating model endpoint record",
1092
- exc=exc,
1093
- traceback=traceback.format_exc(),
1094
- )
1095
-
1096
- except Exception as e:
1097
- logger.error("Failed to retrieve model endpoint object", exc=e)
1100
+ elif (
1101
+ model_ep
1102
+ and (
1103
+ model_ep.spec.monitoring_mode
1104
+ == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1105
+ )
1106
+ != voting_ensemble.context.server.track_models
1107
+ ):
1108
+ monitoring_mode = (
1109
+ mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1110
+ if voting_ensemble.context.server.track_models
1111
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
1112
+ )
1113
+ db = mlrun.get_run_db()
1114
+ db.patch_model_endpoint(
1115
+ project=project,
1116
+ endpoint_id=endpoint_uid,
1117
+ attributes={"monitoring_mode": monitoring_mode},
1118
+ )
1119
+ logger.debug(
1120
+ f"Updating model endpoint monitoring_mode to {monitoring_mode}",
1121
+ endpoint_id=endpoint_uid,
1122
+ )
1098
1123
 
1099
1124
  return endpoint_uid
1100
1125