mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
mlrun/run.py CHANGED
@@ -17,6 +17,7 @@ import json
17
17
  import os
18
18
  import pathlib
19
19
  import socket
20
+ import sys
20
21
  import tempfile
21
22
  import time
22
23
  import typing
@@ -117,14 +118,31 @@ def function_to_module(code="", workdir=None, secrets=None, silent=False):
117
118
  raise ValueError("nothing to run, specify command or function")
118
119
 
119
120
  command = os.path.join(workdir or "", command)
120
- path = Path(command)
121
- mod_name = path.name
122
- if path.suffix:
123
- mod_name = mod_name[: -len(path.suffix)]
121
+
122
+ source_file_path_object, working_dir_path_object = (
123
+ mlrun.utils.helpers.get_source_and_working_dir_paths(command)
124
+ )
125
+ if source_file_path_object.is_relative_to(working_dir_path_object):
126
+ mod_name = mlrun.utils.helpers.get_relative_module_name_from_path(
127
+ source_file_path_object, working_dir_path_object
128
+ )
129
+ elif source_file_path_object.is_relative_to(
130
+ pathlib.Path(tempfile.gettempdir()).resolve()
131
+ ):
132
+ mod_name = Path(command).stem
133
+ else:
134
+ raise mlrun.errors.MLRunRuntimeError(
135
+ f"Cannot run source file '{command}': it must be located either under the current working "
136
+ f"directory ('{working_dir_path_object}') or the system temporary directory ('{tempfile.gettempdir()}'). "
137
+ f"This is required when running with local=True."
138
+ )
139
+
124
140
  spec = imputil.spec_from_file_location(mod_name, command)
125
141
  if spec is None:
126
142
  raise OSError(f"cannot import from {command!r}")
127
143
  mod = imputil.module_from_spec(spec)
144
+ # add to system modules, which can be necessary when running in a MockServer (ML-10937)
145
+ sys.modules[mod_name] = mod
128
146
  spec.loader.exec_module(mod)
129
147
 
130
148
  return mod
@@ -141,7 +159,7 @@ def load_func_code(command="", workdir=None, secrets=None, name="name"):
141
159
  else:
142
160
  is_remote = "://" in command
143
161
  data = get_object(command, secrets)
144
- runtime = yaml.load(data, Loader=yaml.FullLoader)
162
+ runtime = yaml.safe_load(data)
145
163
  runtime = new_function(runtime=runtime)
146
164
 
147
165
  command = runtime.spec.command or ""
@@ -222,7 +240,8 @@ def get_or_create_ctx(
222
240
  :param spec: dictionary holding run spec
223
241
  :param with_env: look for context in environment vars, default True
224
242
  :param rundb: path/url to the metadata and artifact database
225
- :param project: project to initiate the context in (by default `mlrun.mlconf.active_project`)
243
+ :param project: project to initiate the context in (by default `mlrun.mlconf.active_project`).
244
+ If not set, an active project must exist.
226
245
  :param upload_artifacts: when using local context (not as part of a job/run), upload artifacts to the
227
246
  system default artifact path location
228
247
  :return: execution context
@@ -271,12 +290,22 @@ def get_or_create_ctx(
271
290
  elif with_env and config:
272
291
  newspec = config
273
292
 
274
- if isinstance(newspec, (RunObject, RunTemplate)):
293
+ if isinstance(newspec, RunObject | RunTemplate):
275
294
  newspec = newspec.to_dict()
276
295
 
277
296
  if newspec and not isinstance(newspec, dict):
278
297
  newspec = json.loads(newspec)
279
298
 
299
+ if (
300
+ not newspec.get("metadata", {}).get("project")
301
+ and not project
302
+ and not mlconf.active_project
303
+ ):
304
+ raise mlrun.errors.MLRunMissingProjectError(
305
+ """No active project found. Make sure to set an active project using: mlrun.get_or_create_project()
306
+ You can verify the active project with: mlrun.mlconf.active_project"""
307
+ )
308
+
280
309
  if not newspec:
281
310
  newspec = {}
282
311
  if upload_artifacts:
@@ -316,7 +345,7 @@ def get_or_create_ctx(
316
345
  def import_function(url="", secrets=None, db="", project=None, new_name=None):
317
346
  """Create function object from DB or local/remote YAML file
318
347
 
319
- Functions can be imported from function repositories (mlrun Function Hub (formerly Marketplace) or local db),
348
+ Functions can be imported from function repositories (MLRun Hub) or local db),
320
349
  or be read from a remote URL (http(s), s3, git, v3io, ..) containing the function YAML
321
350
 
322
351
  special URLs::
@@ -332,7 +361,7 @@ def import_function(url="", secrets=None, db="", project=None, new_name=None):
332
361
  "https://raw.githubusercontent.com/org/repo/func.yaml"
333
362
  )
334
363
 
335
- :param url: path/url to Function Hub, db or function YAML file
364
+ :param url: path/url to MLRun Hub, db or function YAML file
336
365
  :param secrets: optional, credentials dict for DB or URL (s3, v3io, ...)
337
366
  :param db: optional, mlrun api/db path
338
367
  :param project: optional, target project for the function
@@ -362,10 +391,13 @@ def import_function(url="", secrets=None, db="", project=None, new_name=None):
362
391
  return function
363
392
 
364
393
 
365
- def import_function_to_dict(url, secrets=None):
394
+ def import_function_to_dict(
395
+ url: str,
396
+ secrets: Optional[dict] = None,
397
+ ) -> dict:
366
398
  """Load function spec from local/remote YAML file"""
367
399
  obj = get_object(url, secrets)
368
- runtime = yaml.load(obj, Loader=yaml.FullLoader)
400
+ runtime = yaml.safe_load(obj)
369
401
  remote = "://" in url
370
402
 
371
403
  code = get_in(runtime, "spec.build.functionSourceCode")
@@ -388,20 +420,40 @@ def import_function_to_dict(url, secrets=None):
388
420
  raise ValueError("exec path (spec.command) must be relative")
389
421
  url = url[: url.rfind("/") + 1] + code_file
390
422
  code = get_object(url, secrets)
423
+ code_file = _ensure_path_confined_to_base_dir(
424
+ base_directory=".",
425
+ relative_path=code_file,
426
+ error_message_on_escape="Path traversal detected in spec.command",
427
+ )
391
428
  dir = path.dirname(code_file)
392
429
  if dir:
393
430
  makedirs(dir, exist_ok=True)
394
431
  with open(code_file, "wb") as fp:
395
432
  fp.write(code)
396
433
  elif cmd:
397
- if not path.isfile(code_file):
398
- # look for the file in a relative path to the yaml
399
- slash = url.rfind("/")
400
- if slash >= 0 and path.isfile(url[: url.rfind("/") + 1] + code_file):
401
- raise ValueError(
402
- f"exec file spec.command={code_file} is relative, change working dir"
403
- )
434
+ slash_index = url.rfind("/")
435
+ if slash_index < 0:
436
+ raise ValueError(f"no file in exec path (spec.command={code_file})")
437
+ base_dir = os.path.normpath(url[: slash_index + 1])
438
+
439
+ # Validate and resolve the candidate path before checking existence
440
+ candidate_path = _ensure_path_confined_to_base_dir(
441
+ base_directory=base_dir,
442
+ relative_path=code_file,
443
+ error_message_on_escape=(
444
+ f"exec file spec.command={code_file} is outside of allowed directory"
445
+ ),
446
+ )
447
+
448
+ # Only now it's safe to check file existence
449
+ if not path.isfile(candidate_path):
404
450
  raise ValueError(f"no file in exec path (spec.command={code_file})")
451
+
452
+ # Check that the path is absolute
453
+ if not os.path.isabs(code_file):
454
+ raise ValueError(
455
+ f"exec file spec.command={code_file} is relative, it must be absolute. Change working dir"
456
+ )
405
457
  else:
406
458
  raise ValueError("command or code not specified in function spec")
407
459
 
@@ -503,6 +555,7 @@ def new_function(
503
555
 
504
556
  # make sure function name is valid
505
557
  name = mlrun.utils.helpers.normalize_name(name)
558
+ mlrun.utils.helpers.validate_function_name(name)
506
559
 
507
560
  runner.metadata.name = name
508
561
  runner.metadata.project = (
@@ -542,6 +595,7 @@ def new_function(
542
595
  )
543
596
 
544
597
  runner.prepare_image_for_deploy()
598
+
545
599
  return runner
546
600
 
547
601
 
@@ -575,7 +629,7 @@ def code_to_function(
575
629
  code_output: Optional[str] = "",
576
630
  embed_code: bool = True,
577
631
  description: Optional[str] = "",
578
- requirements: Optional[Union[str, list[str]]] = None,
632
+ requirements: Optional[list[str]] = None,
579
633
  categories: Optional[list[str]] = None,
580
634
  labels: Optional[dict[str, str]] = None,
581
635
  with_doc: Optional[bool] = True,
@@ -638,7 +692,7 @@ def code_to_function(
638
692
  :param description: short function description, defaults to ''
639
693
  :param requirements: a list of python packages
640
694
  :param requirements_file: path to a python requirements file
641
- :param categories: list of categories for mlrun Function Hub, defaults to None
695
+ :param categories: list of categories for MLRun Hub, defaults to None
642
696
  :param labels: name/value pairs dict to tag the function with useful metadata, defaults to None
643
697
  :param with_doc: indicates whether to document the function parameters, defaults to True
644
698
  :param ignored_tags: notebook cells to ignore when converting notebooks to py code (separated by ';')
@@ -681,7 +735,6 @@ def code_to_function(
681
735
  )
682
736
 
683
737
  """
684
- filebase, _ = path.splitext(path.basename(filename))
685
738
  ignored_tags = ignored_tags or mlconf.ignored_notebook_tags
686
739
 
687
740
  def add_name(origin, name=""):
@@ -746,6 +799,7 @@ def code_to_function(
746
799
  kind=sub_kind,
747
800
  ignored_tags=ignored_tags,
748
801
  )
802
+
749
803
  spec["spec"]["env"].append(
750
804
  {
751
805
  "name": "MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK",
@@ -798,6 +852,7 @@ def code_to_function(
798
852
  runtime.spec.build.code_origin = code_origin
799
853
  runtime.spec.build.origin_filename = filename or (name + ".ipynb")
800
854
  update_common(runtime, spec)
855
+
801
856
  return runtime
802
857
 
803
858
  if kind is None or kind in ["", "Function"]:
@@ -811,6 +866,7 @@ def code_to_function(
811
866
 
812
867
  if not name:
813
868
  raise ValueError("name must be specified")
869
+
814
870
  h = get_in(spec, "spec.handler", "").split(":")
815
871
  runtime.handler = h[0] if len(h) <= 1 else h[1]
816
872
  runtime.metadata = get_in(spec, "spec.metadata")
@@ -1258,3 +1314,21 @@ def wait_for_runs_completion(
1258
1314
  runs = running
1259
1315
 
1260
1316
  return completed
1317
+
1318
+
1319
+ def _ensure_path_confined_to_base_dir(
1320
+ base_directory: str,
1321
+ relative_path: str,
1322
+ error_message_on_escape: str,
1323
+ ) -> str:
1324
+ """
1325
+ Join `user_supplied_relative_path` to `allowed_base_directory`, normalise the result,
1326
+ and guarantee it stays inside `allowed_base_directory`.
1327
+ """
1328
+ absolute_base_directory = path.abspath(base_directory)
1329
+ absolute_candidate_path = path.abspath(
1330
+ path.join(absolute_base_directory, relative_path)
1331
+ )
1332
+ if not absolute_candidate_path.startswith(absolute_base_directory + path.sep):
1333
+ raise ValueError(error_message_on_escape)
1334
+ return absolute_candidate_path
@@ -26,6 +26,7 @@ __all__ = [
26
26
  "KubeResource",
27
27
  "ApplicationRuntime",
28
28
  "MpiRuntimeV1",
29
+ "RuntimeKinds",
29
30
  ]
30
31
 
31
32
  import typing
@@ -34,6 +35,7 @@ from mlrun.runtimes.utils import resolve_spark_operator_version
34
35
 
35
36
  from ..common.runtimes.constants import MPIJobCRDVersions
36
37
  from .base import BaseRuntime, RunError, RuntimeClassMode # noqa
38
+ from .constants import RuntimeKinds
37
39
  from .daskjob import DaskCluster # noqa
38
40
  from .databricks_job.databricks_runtime import DatabricksRuntime
39
41
  from .kubejob import KubejobRuntime, KubeResource # noqa
@@ -94,192 +96,6 @@ def new_model_server(
94
96
  )
95
97
 
96
98
 
97
- class RuntimeKinds:
98
- remote = "remote"
99
- nuclio = "nuclio"
100
- dask = "dask"
101
- job = "job"
102
- spark = "spark"
103
- remotespark = "remote-spark"
104
- mpijob = "mpijob"
105
- serving = "serving"
106
- local = "local"
107
- handler = "handler"
108
- databricks = "databricks"
109
- application = "application"
110
-
111
- @staticmethod
112
- def all():
113
- return [
114
- RuntimeKinds.remote,
115
- RuntimeKinds.nuclio,
116
- RuntimeKinds.serving,
117
- RuntimeKinds.dask,
118
- RuntimeKinds.job,
119
- RuntimeKinds.spark,
120
- RuntimeKinds.remotespark,
121
- RuntimeKinds.mpijob,
122
- RuntimeKinds.local,
123
- RuntimeKinds.databricks,
124
- RuntimeKinds.application,
125
- ]
126
-
127
- @staticmethod
128
- def runtime_with_handlers():
129
- return [
130
- RuntimeKinds.dask,
131
- RuntimeKinds.job,
132
- RuntimeKinds.spark,
133
- RuntimeKinds.remotespark,
134
- RuntimeKinds.mpijob,
135
- RuntimeKinds.databricks,
136
- ]
137
-
138
- @staticmethod
139
- def abortable_runtimes():
140
- return [
141
- RuntimeKinds.job,
142
- RuntimeKinds.spark,
143
- RuntimeKinds.remotespark,
144
- RuntimeKinds.mpijob,
145
- RuntimeKinds.databricks,
146
- RuntimeKinds.local,
147
- RuntimeKinds.handler,
148
- "",
149
- ]
150
-
151
- @staticmethod
152
- def retriable_runtimes():
153
- return [
154
- RuntimeKinds.job,
155
- ]
156
-
157
- @staticmethod
158
- def nuclio_runtimes():
159
- return [
160
- RuntimeKinds.remote,
161
- RuntimeKinds.nuclio,
162
- RuntimeKinds.serving,
163
- RuntimeKinds.application,
164
- ]
165
-
166
- @staticmethod
167
- def pure_nuclio_deployed_runtimes():
168
- return [
169
- RuntimeKinds.remote,
170
- RuntimeKinds.nuclio,
171
- RuntimeKinds.serving,
172
- ]
173
-
174
- @staticmethod
175
- def handlerless_runtimes():
176
- return [
177
- RuntimeKinds.serving,
178
- # Application runtime handler is internal reverse proxy
179
- RuntimeKinds.application,
180
- ]
181
-
182
- @staticmethod
183
- def local_runtimes():
184
- return [
185
- RuntimeKinds.local,
186
- RuntimeKinds.handler,
187
- ]
188
-
189
- @staticmethod
190
- def is_log_collectable_runtime(kind: typing.Optional[str]):
191
- """
192
- whether log collector can collect logs for that runtime
193
- :param kind: kind name
194
- :return: whether log collector can collect logs for that runtime
195
- """
196
- # if local run, the log collector doesn't support it as it is only supports k8s resources
197
- # when runtime is local the client is responsible for logging the stdout of the run by using `log_std`
198
- if RuntimeKinds.is_local_runtime(kind):
199
- return False
200
-
201
- if (
202
- kind
203
- not in [
204
- # dask implementation is different from other runtimes, because few runs can be run against the same
205
- # runtime resource, so collecting logs on that runtime resource won't be correct, the way we collect
206
- # logs for dask is by using `log_std` on client side after we execute the code against the cluster,
207
- # as submitting the run with the dask client will return the run stdout.
208
- # For more information head to `DaskCluster._run`.
209
- RuntimeKinds.dask
210
- ]
211
- + RuntimeKinds.nuclio_runtimes()
212
- ):
213
- return True
214
-
215
- return False
216
-
217
- @staticmethod
218
- def is_local_runtime(kind):
219
- # "" or None counted as local
220
- if not kind or kind in RuntimeKinds.local_runtimes():
221
- return True
222
- return False
223
-
224
- @staticmethod
225
- def requires_absolute_artifacts_path(kind):
226
- """
227
- Returns True if the runtime kind requires absolute artifacts' path (i.e. is local), False otherwise.
228
- """
229
- if RuntimeKinds.is_local_runtime(kind):
230
- return False
231
-
232
- if kind not in [
233
- # logging artifacts is done externally to the dask cluster by a client that can either run locally (in which
234
- # case the path can be relative) or remotely (in which case the path must be absolute and will be passed
235
- # to another run)
236
- RuntimeKinds.dask
237
- ]:
238
- return True
239
- return False
240
-
241
- @staticmethod
242
- def requires_image_name_for_execution(kind):
243
- if RuntimeKinds.is_local_runtime(kind):
244
- return False
245
-
246
- # both spark and remote spark uses different mechanism for assigning images
247
- return kind not in [RuntimeKinds.spark, RuntimeKinds.remotespark]
248
-
249
- @staticmethod
250
- def supports_from_notebook(kind):
251
- return kind not in [RuntimeKinds.application]
252
-
253
- @staticmethod
254
- def resolve_nuclio_runtime(kind: str, sub_kind: str):
255
- kind = kind.split(":")[0]
256
- if kind not in RuntimeKinds.nuclio_runtimes():
257
- raise ValueError(
258
- f"Kind {kind} is not a nuclio runtime, available runtimes are {RuntimeKinds.nuclio_runtimes()}"
259
- )
260
-
261
- if sub_kind == serving_subkind:
262
- return ServingRuntime()
263
-
264
- if kind == RuntimeKinds.application:
265
- return ApplicationRuntime()
266
-
267
- runtime = RemoteRuntime()
268
- runtime.spec.function_kind = sub_kind
269
- return runtime
270
-
271
- @staticmethod
272
- def resolve_nuclio_sub_kind(kind):
273
- is_nuclio = kind.startswith("nuclio")
274
- sub_kind = kind[kind.find(":") + 1 :] if is_nuclio and ":" in kind else None
275
- if kind == RuntimeKinds.serving:
276
- is_nuclio = True
277
- sub_kind = serving_subkind
278
- elif kind == RuntimeKinds.application:
279
- is_nuclio = True
280
- return is_nuclio, sub_kind
281
-
282
-
283
99
  def get_runtime_class(kind: str):
284
100
  if kind == RuntimeKinds.mpijob:
285
101
  return MpiRuntimeV1
mlrun/runtimes/base.py CHANGED
@@ -16,8 +16,9 @@ import http
16
16
  import re
17
17
  import typing
18
18
  import warnings
19
+ from collections.abc import Callable
19
20
  from os import environ
20
- from typing import Callable, Optional, Union
21
+ from typing import Optional, Union
21
22
 
22
23
  import requests.exceptions
23
24
  from nuclio.build import mlrun_footer
@@ -30,6 +31,7 @@ import mlrun.common.schemas
30
31
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
31
32
  import mlrun.errors
32
33
  import mlrun.launcher.factory
34
+ import mlrun.runtimes
33
35
  import mlrun.utils.helpers
34
36
  import mlrun.utils.notifications
35
37
  import mlrun.utils.regex
@@ -142,9 +144,6 @@ class FunctionSpec(ModelObj):
142
144
  def build(self, build):
143
145
  self._build = self._verify_dict(build, "build", ImageBuilder)
144
146
 
145
- def enrich_function_preemption_spec(self):
146
- pass
147
-
148
147
  def validate_service_account(self, allowed_service_accounts):
149
148
  pass
150
149
 
@@ -280,18 +279,6 @@ class BaseRuntime(ModelObj):
280
279
  mlrun.model.Credentials.generate_access_key
281
280
  )
282
281
 
283
- def generate_runtime_k8s_env(self, runobj: RunObject = None) -> list[dict]:
284
- """
285
- Prepares a runtime environment as it's expected by kubernetes.models.V1Container
286
-
287
- :param runobj: Run context object (RunObject) with run metadata and status
288
- :return: List of dicts with the structure {"name": "var_name", "value": "var_value"}
289
- """
290
- return [
291
- {"name": k, "value": v}
292
- for k, v in self._generate_runtime_env(runobj).items()
293
- ]
294
-
295
282
  def run(
296
283
  self,
297
284
  runspec: Optional[
@@ -379,7 +366,12 @@ class BaseRuntime(ModelObj):
379
366
  This ensures latest code changes are executed. This argument must be used in
380
367
  conjunction with the local=True argument.
381
368
  :param output_path: Default artifact output path.
382
- :param retry: Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
369
+ :param retry: Retry configuration for the run, can be a dict or an instance of
370
+ :py:class:`~mlrun.model.Retry`.
371
+ The `count` field in the `Retry` object specifies the number of retry attempts.
372
+ If `count=0`, the run will not be retried.
373
+ The `backoff` field specifies the retry backoff strategy between retry attempts.
374
+ If not provided, the default backoff delay is 30 seconds.
383
375
  :return: Run context object (RunObject) with run metadata, results and status
384
376
  """
385
377
  if artifact_path or out_path:
@@ -391,6 +383,7 @@ class BaseRuntime(ModelObj):
391
383
  FutureWarning,
392
384
  )
393
385
  output_path = output_path or out_path or artifact_path
386
+
394
387
  launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
395
388
  self._is_remote, local=local, **launcher_kwargs
396
389
  )
@@ -438,30 +431,86 @@ class BaseRuntime(ModelObj):
438
431
  if task:
439
432
  return task.to_dict()
440
433
 
441
- def _generate_runtime_env(self, runobj: RunObject = None) -> dict:
434
+ def _generate_runtime_env(self, runobj: RunObject = None):
442
435
  """
443
- Prepares all available environment variables for usage on a runtime
444
- Data will be extracted from several sources and most of them are not guaranteed to be available
436
+ Prepares all available environment variables for usage on a runtime.
445
437
 
446
- :param runobj: Run context object (RunObject) with run metadata and status
447
- :return: Dictionary with all the variables that could be parsed
438
+ :param runobj: Optional run context object (RunObject) with run metadata and status
439
+ :return: Tuple of (runtime_env, external_source_env) where:
440
+ - runtime_env: Dict of {env_name: value} for standard env vars
441
+ - external_source_env: Dict of {env_name: value_from} for env vars with external sources
448
442
  """
443
+ active_project = self.metadata.project or config.active_project
449
444
  runtime_env = {
450
- "MLRUN_ACTIVE_PROJECT": self.metadata.project or config.active_project
445
+ mlrun_constants.MLRUN_ACTIVE_PROJECT: active_project,
446
+ # TODO: Remove this in 1.12.0 as MLRUN_DEFAULT_PROJECT is deprecated and should not be injected anymore
447
+ "MLRUN_DEFAULT_PROJECT": active_project,
451
448
  }
449
+
450
+ # Set auth session only for nuclio runtimes that have an access key
451
+ if (
452
+ self.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes()
453
+ and self.metadata.credentials.access_key
454
+ ):
455
+ runtime_env[
456
+ mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
457
+ ] = self.metadata.credentials.access_key
458
+
452
459
  if runobj:
453
460
  runtime_env["MLRUN_EXEC_CONFIG"] = runobj.to_json(
454
461
  exclude_notifications_params=True
455
462
  )
456
463
  if runobj.metadata.project:
457
- runtime_env["MLRUN_ACTIVE_PROJECT"] = runobj.metadata.project
464
+ runtime_env[mlrun_constants.MLRUN_ACTIVE_PROJECT] = (
465
+ runobj.metadata.project
466
+ )
458
467
  if runobj.spec.verbose:
459
468
  runtime_env["MLRUN_LOG_LEVEL"] = "DEBUG"
460
469
  if config.httpdb.api_url:
461
470
  runtime_env["MLRUN_DBPATH"] = config.httpdb.api_url
462
471
  if self.metadata.namespace or config.namespace:
463
472
  runtime_env["MLRUN_NAMESPACE"] = self.metadata.namespace or config.namespace
464
- return runtime_env
473
+
474
+ external_source_env = self._generate_external_source_runtime_envs()
475
+
476
+ return runtime_env, external_source_env
477
+
478
+ def _generate_external_source_runtime_envs(self):
479
+ """
480
+ Returns non-static env vars to be added to the runtime pod/container.
481
+
482
+ :return: Dict of {env_name: value_from} for env vars with external sources (e.g., fieldRef)
483
+ """
484
+ return {
485
+ "MLRUN_RUNTIME_KIND": {
486
+ "fieldRef": {
487
+ "apiVersion": "v1",
488
+ "fieldPath": f"metadata.labels['{mlrun_constants.MLRunInternalLabels.mlrun_class}']",
489
+ }
490
+ },
491
+ }
492
+
493
+ def _generate_k8s_runtime_env(self, runobj: RunObject = None):
494
+ """
495
+ Generates runtime environment variables in Kubernetes format.
496
+
497
+ :param runobj: Optional run context object (RunObject) with run metadata and status
498
+ :return: List of env var dicts in K8s format:
499
+ - Standard envs: [{"name": key, "value": value}, ...]
500
+ - External source envs: [{"name": key, "valueFrom": value_from}, ...]
501
+ """
502
+ runtime_env, external_source_env = self._generate_runtime_env(runobj)
503
+
504
+ # Convert standard env vars to K8s format
505
+ k8s_env = [{"name": k, "value": v} for k, v in runtime_env.items()]
506
+
507
+ # Convert external source env vars to K8s format
508
+ k8s_external_env = [
509
+ {"name": k, "valueFrom": v} for k, v in external_source_env.items()
510
+ ]
511
+
512
+ k8s_env.extend(k8s_external_env)
513
+ return k8s_env
465
514
 
466
515
  @staticmethod
467
516
  def _handle_submit_job_http_error(error: requests.HTTPError):
@@ -939,5 +988,34 @@ class BaseRuntime(ModelObj):
939
988
  line += f", default={p['default']}"
940
989
  print(" " + line)
941
990
 
991
+ def remove_auth_secret_volumes(self):
992
+ secret_name_prefix = (
993
+ mlrun.mlconf.secret_stores.kubernetes.auth_secret_name.format(
994
+ hashed_access_key=""
995
+ )
996
+ )
997
+ volumes = self.spec.volumes or []
998
+ mounts = self.spec.volume_mounts or []
999
+
1000
+ volumes_to_remove = set()
1001
+
1002
+ # Identify volumes to remove
1003
+ for vol in volumes:
1004
+ secret_name = mlrun.utils.get_in(vol, "secret.secretName", "")
1005
+
1006
+ # Pattern of auth secret volumes
1007
+ if secret_name.startswith(secret_name_prefix):
1008
+ volumes_to_remove.add(vol["name"])
1009
+
1010
+ # Filter out only the matched volumes
1011
+ self.spec.volumes = [
1012
+ volume for volume in volumes if volume["name"] not in volumes_to_remove
1013
+ ]
1014
+
1015
+ # Filter out matching mounts
1016
+ self.spec.volume_mounts = [
1017
+ mount for mount in mounts if mount["name"] not in volumes_to_remove
1018
+ ]
1019
+
942
1020
  def skip_image_enrichment(self):
943
1021
  return False