mlrun 1.7.0rc42__py3-none-any.whl → 1.7.0rc44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/artifacts/base.py +1 -1
  3. mlrun/artifacts/manager.py +15 -4
  4. mlrun/common/schemas/__init__.py +1 -0
  5. mlrun/common/schemas/alert.py +11 -11
  6. mlrun/common/schemas/client_spec.py +0 -1
  7. mlrun/common/schemas/frontend_spec.py +7 -0
  8. mlrun/common/schemas/notification.py +32 -5
  9. mlrun/common/schemas/workflow.py +1 -0
  10. mlrun/config.py +46 -21
  11. mlrun/data_types/data_types.py +5 -0
  12. mlrun/datastore/base.py +4 -7
  13. mlrun/datastore/storeytargets.py +4 -3
  14. mlrun/datastore/targets.py +17 -4
  15. mlrun/db/httpdb.py +2 -12
  16. mlrun/db/nopdb.py +21 -4
  17. mlrun/execution.py +7 -2
  18. mlrun/feature_store/api.py +1 -0
  19. mlrun/feature_store/retrieval/spark_merger.py +7 -3
  20. mlrun/frameworks/_common/plan.py +3 -3
  21. mlrun/frameworks/_ml_common/plan.py +1 -1
  22. mlrun/frameworks/parallel_coordinates.py +2 -3
  23. mlrun/k8s_utils.py +48 -2
  24. mlrun/launcher/client.py +6 -6
  25. mlrun/model.py +2 -1
  26. mlrun/model_monitoring/controller.py +1 -1
  27. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +15 -1
  28. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +12 -0
  29. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -2
  30. mlrun/model_monitoring/helpers.py +7 -15
  31. mlrun/model_monitoring/writer.py +8 -2
  32. mlrun/projects/pipelines.py +2 -0
  33. mlrun/projects/project.py +146 -57
  34. mlrun/render.py +3 -3
  35. mlrun/runtimes/kubejob.py +6 -6
  36. mlrun/runtimes/local.py +4 -1
  37. mlrun/runtimes/nuclio/api_gateway.py +6 -0
  38. mlrun/runtimes/nuclio/application/application.py +3 -2
  39. mlrun/runtimes/pod.py +16 -8
  40. mlrun/runtimes/sparkjob/spark3job.py +4 -0
  41. mlrun/utils/async_http.py +1 -1
  42. mlrun/utils/helpers.py +56 -22
  43. mlrun/utils/notifications/notification/__init__.py +0 -1
  44. mlrun/utils/version/version.json +2 -2
  45. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/METADATA +27 -27
  46. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/RECORD +50 -50
  47. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/WHEEL +1 -1
  48. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/LICENSE +0 -0
  49. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/entry_points.txt +0 -0
  50. {mlrun-1.7.0rc42.dist-info → mlrun-1.7.0rc44.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py CHANGED
@@ -18,6 +18,7 @@ import glob
18
18
  import http
19
19
  import importlib.util as imputil
20
20
  import json
21
+ import os
21
22
  import pathlib
22
23
  import shutil
23
24
  import tempfile
@@ -25,6 +26,7 @@ import typing
25
26
  import uuid
26
27
  import warnings
27
28
  import zipfile
29
+ from copy import deepcopy
28
30
  from os import environ, makedirs, path
29
31
  from typing import Callable, Optional, Union
30
32
 
@@ -65,13 +67,7 @@ from ..features import Feature
65
67
  from ..model import EntrypointParam, ImageBuilder, ModelObj
66
68
  from ..run import code_to_function, get_object, import_function, new_function
67
69
  from ..secrets import SecretsStore
68
- from ..utils import (
69
- is_ipython,
70
- is_relative_path,
71
- is_yaml_path,
72
- logger,
73
- update_in,
74
- )
70
+ from ..utils import is_jupyter, is_relative_path, is_yaml_path, logger, update_in
75
71
  from ..utils.clones import (
76
72
  add_credentials_git_remote_url,
77
73
  clone_git,
@@ -251,8 +247,7 @@ def new_project(
251
247
  project.spec.description = description
252
248
 
253
249
  if default_function_node_selector:
254
- for key, val in default_function_node_selector.items():
255
- project.spec.default_function_node_selector[key] = val
250
+ project.spec.default_function_node_selector = default_function_node_selector
256
251
 
257
252
  if parameters:
258
253
  # Enable setting project parameters at load time, can be used to customize the project_setup
@@ -874,7 +869,7 @@ class ProjectSpec(ModelObj):
874
869
  # in a tuple where the first index is the packager module's path (str) and the second is a flag (bool) for
875
870
  # whether it is mandatory for a run (raise exception on collection error) or not.
876
871
  self.custom_packagers = custom_packagers or []
877
- self.default_function_node_selector = default_function_node_selector or {}
872
+ self._default_function_node_selector = default_function_node_selector or None
878
873
 
879
874
  @property
880
875
  def source(self) -> str:
@@ -1049,6 +1044,14 @@ class ProjectSpec(ModelObj):
1049
1044
  if key in self._artifacts:
1050
1045
  del self._artifacts[key]
1051
1046
 
1047
+ @property
1048
+ def default_function_node_selector(self):
1049
+ return self._default_function_node_selector
1050
+
1051
+ @default_function_node_selector.setter
1052
+ def default_function_node_selector(self, node_selector: dict[str, str]):
1053
+ self._default_function_node_selector = deepcopy(node_selector)
1054
+
1052
1055
  @property
1053
1056
  def build(self) -> ImageBuilder:
1054
1057
  return self._build
@@ -1590,7 +1593,9 @@ class MlrunProject(ModelObj):
1590
1593
  :param format: artifact file format: csv, png, ..
1591
1594
  :param tag: version tag
1592
1595
  :param target_path: absolute target path (instead of using artifact_path + local_path)
1593
- :param upload: upload to datastore (default is True)
1596
+ :param upload: Whether to upload the artifact to the datastore. If not provided, and the `local_path`
1597
+ is not a directory, upload occurs by default. Directories are uploaded only when this
1598
+ flag is explicitly set to `True`.
1594
1599
  :param labels: a set of key/value labels to tag the artifact with
1595
1600
 
1596
1601
  :returns: artifact object
@@ -2325,31 +2330,51 @@ class MlrunProject(ModelObj):
2325
2330
  requirements: typing.Union[str, list[str]] = None,
2326
2331
  requirements_file: str = "",
2327
2332
  ) -> mlrun.runtimes.BaseRuntime:
2328
- """update or add a function object to the project
2333
+ """
2334
+ | Update or add a function object to the project.
2335
+ | Function can be provided as an object (func) or a .py/.ipynb/.yaml URL.
2329
2336
 
2330
- function can be provided as an object (func) or a .py/.ipynb/.yaml url
2331
- support url prefixes::
2337
+ | Creating a function from a single file is done by specifying ``func`` and disabling ``with_repo``.
2338
+ | Creating a function with project source (specify ``with_repo=True``):
2339
+ | 1. Specify a relative ``func`` path.
2340
+ | 2. Specify a module ``handler`` (e.g. ``handler=package.package.func``) without ``func``.
2341
+ | Creating a function with non project source is done by specifying a module ``handler`` and on the
2342
+ returned function set the source with ``function.with_source_archive(<source>)``.
2332
2343
 
2333
- object (s3://, v3io://, ..)
2334
- MLRun DB e.g. db://project/func:ver
2335
- functions hub/market: e.g. hub://auto-trainer:master
2344
+ Support URL prefixes:
2336
2345
 
2337
- examples::
2346
+ | Object (s3://, v3io://, ..)
2347
+ | MLRun DB e.g. db://project/func:ver
2348
+ | Functions hub/market: e.g. hub://auto-trainer:master
2349
+
2350
+ Examples::
2338
2351
 
2339
2352
  proj.set_function(func_object)
2340
- proj.set_function(
2341
- "./src/mycode.py", "ingest", image="myrepo/ing:latest", with_repo=True
2342
- )
2343
2353
  proj.set_function("http://.../mynb.ipynb", "train")
2344
2354
  proj.set_function("./func.yaml")
2345
2355
  proj.set_function("hub://get_toy_data", "getdata")
2346
2356
 
2347
- # set function requirements
2357
+ # Create a function from a single file
2358
+ proj.set_function("./src/mycode.py", "ingest")
2348
2359
 
2349
- # by providing a list of packages
2360
+ # Creating a function with project source
2361
+ proj.set_function(
2362
+ "./src/mycode.py", "ingest", image="myrepo/ing:latest", with_repo=True
2363
+ )
2364
+ proj.set_function("ingest", handler="package.package.func", with_repo=True)
2365
+
2366
+ # Creating a function with non project source
2367
+ func = proj.set_function(
2368
+ "ingest", handler="package.package.func", with_repo=False
2369
+ )
2370
+ func.with_source_archive("git://github.com/mlrun/something.git")
2371
+
2372
+ # Set function requirements
2373
+
2374
+ # By providing a list of packages
2350
2375
  proj.set_function("my.py", requirements=["requests", "pandas"])
2351
2376
 
2352
- # by providing a path to a pip requirements file
2377
+ # By providing a path to a pip requirements file
2353
2378
  proj.set_function("my.py", requirements="requirements.txt")
2354
2379
 
2355
2380
  :param func: Function object or spec/code url, None refers to current Notebook
@@ -2369,7 +2394,7 @@ class MlrunProject(ModelObj):
2369
2394
  :param requirements: A list of python packages
2370
2395
  :param requirements_file: Path to a python requirements file
2371
2396
 
2372
- :returns: function object
2397
+ :returns: :py:class:`~mlrun.runtimes.BaseRuntime`
2373
2398
  """
2374
2399
  (
2375
2400
  resolved_function_name,
@@ -2410,7 +2435,7 @@ class MlrunProject(ModelObj):
2410
2435
  ):
2411
2436
  # if function path is not provided and it is not a module (no ".")
2412
2437
  # use the current notebook as default
2413
- if is_ipython:
2438
+ if is_jupyter:
2414
2439
  from IPython import get_ipython
2415
2440
 
2416
2441
  kernel = get_ipython()
@@ -2801,47 +2826,92 @@ class MlrunProject(ModelObj):
2801
2826
  secrets=secrets or {},
2802
2827
  )
2803
2828
 
2804
- def sync_functions(self, names: list = None, always=True, save=False):
2805
- """reload function objects from specs and files"""
2829
+ def sync_functions(
2830
+ self,
2831
+ names: list = None,
2832
+ always: bool = True,
2833
+ save: bool = False,
2834
+ silent: bool = False,
2835
+ ):
2836
+ """
2837
+ Reload function objects from specs and files.
2838
+ The function objects are synced against the definitions spec in `self.spec._function_definitions`.
2839
+ Referenced files/URLs in the function spec will be reloaded.
2840
+ Function definitions are parsed by the following precedence:
2841
+ 1. Contains runtime spec.
2842
+ 2. Contains module in the project's context.
2843
+ 3. Contains path to function definition (yaml, DB, Hub).
2844
+ 4. Contains path to .ipynb or .py files.
2845
+ 5. Contains a Nuclio/Serving function image / an 'Application' kind definition.
2846
+ If function definition is already an object, some project metadata updates will apply however,
2847
+ it will not be reloaded.
2848
+
2849
+ :param names: Names of functions to reload, defaults to `self.spec._function_definitions.keys()`.
2850
+ :param always: Force reloading the functions.
2851
+ :param save: Whether to save the loaded functions or not.
2852
+ :param silent: Whether to raise an exception when a function fails to load.
2853
+
2854
+ :returns: Dictionary of function objects
2855
+ """
2806
2856
  if self._initialized and not always:
2807
2857
  return self.spec._function_objects
2808
2858
 
2809
- funcs = self.spec._function_objects
2859
+ functions = self.spec._function_objects
2810
2860
  if not names:
2811
2861
  names = self.spec._function_definitions.keys()
2812
- funcs = {}
2862
+ functions = {}
2863
+
2813
2864
  origin = mlrun.runtimes.utils.add_code_metadata(self.spec.context)
2814
2865
  for name in names:
2815
- f = self.spec._function_definitions.get(name)
2816
- if not f:
2817
- raise ValueError(f"function named {name} not found")
2866
+ function_definition = self.spec._function_definitions.get(name)
2867
+ if not function_definition:
2868
+ if silent:
2869
+ logger.warn(
2870
+ "Function definition was not found, skipping reload", name=name
2871
+ )
2872
+ continue
2873
+
2874
+ raise ValueError(f"Function named {name} not found")
2875
+
2876
+ function_object = self.spec._function_objects.get(name, None)
2877
+ is_base_runtime = isinstance(
2878
+ function_object, mlrun.runtimes.base.BaseRuntime
2879
+ )
2818
2880
  # If this function is already available locally, don't recreate it unless always=True
2819
- if (
2820
- isinstance(
2821
- self.spec._function_objects.get(name, None),
2822
- mlrun.runtimes.base.BaseRuntime,
2823
- )
2824
- and not always
2825
- ):
2826
- funcs[name] = self.spec._function_objects[name]
2881
+ if is_base_runtime and not always:
2882
+ functions[name] = function_object
2827
2883
  continue
2828
- if hasattr(f, "to_dict"):
2829
- name, func = _init_function_from_obj(f, self, name)
2830
- else:
2831
- if not isinstance(f, dict):
2832
- raise ValueError("function must be an object or dict")
2884
+
2885
+ # Reload the function
2886
+ if hasattr(function_definition, "to_dict"):
2887
+ name, func = _init_function_from_obj(function_definition, self, name)
2888
+ elif isinstance(function_definition, dict):
2833
2889
  try:
2834
- name, func = _init_function_from_dict(f, self, name)
2890
+ name, func = _init_function_from_dict(
2891
+ function_definition, self, name
2892
+ )
2835
2893
  except FileNotFoundError as exc:
2836
- raise mlrun.errors.MLRunMissingDependencyError(
2837
- f"File {exc.filename} not found while syncing project functions"
2838
- ) from exc
2894
+ message = f"File {exc.filename} not found while syncing project functions."
2895
+ if silent:
2896
+ message += " Skipping function reload"
2897
+ logger.warn(message, name=name)
2898
+ continue
2899
+
2900
+ raise mlrun.errors.MLRunMissingDependencyError(message) from exc
2901
+ else:
2902
+ message = f"Function {name} must be an object or dict."
2903
+ if silent:
2904
+ message += " Skipping function reload"
2905
+ logger.warn(message, name=name)
2906
+ continue
2907
+ raise ValueError(message)
2908
+
2839
2909
  func.spec.build.code_origin = origin
2840
- funcs[name] = func
2910
+ functions[name] = func
2841
2911
  if save:
2842
2912
  func.save(versioned=False)
2843
2913
 
2844
- self.spec._function_objects = funcs
2914
+ self.spec._function_objects = functions
2845
2915
  self._initialized = True
2846
2916
  return self.spec._function_objects
2847
2917
 
@@ -2986,6 +3056,7 @@ class MlrunProject(ModelObj):
2986
3056
  source: str = None,
2987
3057
  cleanup_ttl: int = None,
2988
3058
  notifications: list[mlrun.model.Notification] = None,
3059
+ workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
2989
3060
  ) -> _PipelineRunStatus:
2990
3061
  """Run a workflow using kubeflow pipelines
2991
3062
 
@@ -3022,7 +3093,11 @@ class MlrunProject(ModelObj):
3022
3093
  workflow and all its resources are deleted)
3023
3094
  :param notifications:
3024
3095
  List of notifications to send for workflow completion
3025
-
3096
+ :param workflow_runner_node_selector:
3097
+ Defines the node selector for the workflow runner pod when using a remote engine.
3098
+ This allows you to control and specify where the workflow runner pod will be scheduled.
3099
+ This setting is only relevant when the engine is set to 'remote' or for scheduled workflows,
3100
+ and it will be ignored if the workflow is not run on a remote engine.
3026
3101
  :returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
3027
3102
  """
3028
3103
 
@@ -3041,11 +3116,10 @@ class MlrunProject(ModelObj):
3041
3116
  )
3042
3117
 
3043
3118
  if engine not in ["remote"] and not schedule:
3044
- # For remote/scheduled runs we don't require the functions to be synced as they can be loaded dynamically
3045
- # during run
3046
- self.sync_functions(always=sync)
3119
+ # For remote/scheduled runs there is no need to sync functions as they can be loaded dynamically during run
3120
+ self.sync_functions(always=sync, silent=True)
3047
3121
  if not self.spec._function_objects:
3048
- raise ValueError(
3122
+ logger.warn(
3049
3123
  "There are no functions in the project."
3050
3124
  " Make sure you've set your functions with project.set_function()."
3051
3125
  )
@@ -3089,6 +3163,16 @@ class MlrunProject(ModelObj):
3089
3163
  )
3090
3164
  inner_engine = get_workflow_engine(engine_kind, local).engine
3091
3165
  workflow_spec.engine = inner_engine or workflow_engine.engine
3166
+ if workflow_runner_node_selector:
3167
+ if workflow_engine.engine == "remote":
3168
+ workflow_spec.workflow_runner_node_selector = (
3169
+ workflow_runner_node_selector
3170
+ )
3171
+ else:
3172
+ logger.warn(
3173
+ "'workflow_runner_node_selector' applies only to remote engines"
3174
+ " and is ignored for non-remote runs."
3175
+ )
3092
3176
 
3093
3177
  run = workflow_engine.run(
3094
3178
  self,
@@ -4289,6 +4373,7 @@ class MlrunProject(ModelObj):
4289
4373
  kind=producer_dict.get("kind", ""),
4290
4374
  project=producer_project,
4291
4375
  tag=producer_tag,
4376
+ owner=producer_dict.get("owner", ""),
4292
4377
  ), True
4293
4378
 
4294
4379
  # do not retain the artifact's producer, replace it with the project as the producer
@@ -4298,6 +4383,7 @@ class MlrunProject(ModelObj):
4298
4383
  name=self.metadata.name,
4299
4384
  project=self.metadata.name,
4300
4385
  tag=project_producer_tag,
4386
+ owner=self._resolve_artifact_owner(),
4301
4387
  ), False
4302
4388
 
4303
4389
  def _resolve_existing_artifact(
@@ -4337,6 +4423,9 @@ class MlrunProject(ModelObj):
4337
4423
  def _get_project_tag(self):
4338
4424
  return self._get_hexsha() or str(uuid.uuid4())
4339
4425
 
4426
+ def _resolve_artifact_owner(self):
4427
+ return os.getenv("V3IO_USERNAME") or self.spec.owner
4428
+
4340
4429
 
4341
4430
  def _set_as_current_default_project(project: MlrunProject):
4342
4431
  mlrun.mlconf.default_project = project.metadata.name
mlrun/render.py CHANGED
@@ -22,7 +22,7 @@ import mlrun.utils
22
22
 
23
23
  from .config import config
24
24
  from .datastore import uri_to_ipython
25
- from .utils import dict_to_list, get_in, is_ipython
25
+ from .utils import dict_to_list, get_in, is_jupyter
26
26
 
27
27
  JUPYTER_SERVER_ROOT = environ.get("HOME", "/User")
28
28
  supported_viewers = [
@@ -181,8 +181,8 @@ def run_to_html(results, display=True):
181
181
 
182
182
 
183
183
  def ipython_display(html, display=True, alt_text=None):
184
- if display and html and is_ipython:
185
- import IPython
184
+ if display and html and is_jupyter:
185
+ import IPython.display
186
186
 
187
187
  IPython.display.display(IPython.display.HTML(html))
188
188
  elif alt_text:
mlrun/runtimes/kubejob.py CHANGED
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import typing
15
15
  import warnings
16
16
 
17
17
  from mlrun_pipelines.common.ops import build_op
@@ -143,11 +143,11 @@ class KubejobRuntime(KubeResource):
143
143
 
144
144
  def deploy(
145
145
  self,
146
- watch=True,
147
- with_mlrun=None,
148
- skip_deployed=False,
149
- is_kfp=False,
150
- mlrun_version_specifier=None,
146
+ watch: bool = True,
147
+ with_mlrun: typing.Optional[bool] = None,
148
+ skip_deployed: bool = False,
149
+ is_kfp: bool = False,
150
+ mlrun_version_specifier: typing.Optional[bool] = None,
151
151
  builder_env: dict = None,
152
152
  show_on_failure: bool = False,
153
153
  force_build: bool = False,
mlrun/runtimes/local.py CHANGED
@@ -145,7 +145,10 @@ class ParallelRunner:
145
145
  if function_name and generator.options.teardown_dask:
146
146
  logger.info("Tearing down the dask cluster..")
147
147
  mlrun.get_run_db().delete_runtime_resources(
148
- kind="dask", object_id=function_name, force=True
148
+ project=self.metadata.project,
149
+ kind=mlrun.runtimes.RuntimeKinds.dask,
150
+ object_id=function_name,
151
+ force=True,
149
152
  )
150
153
 
151
154
  return results
@@ -587,6 +587,12 @@ class APIGateway(ModelObj):
587
587
  self.metadata.annotations, gateway_timeout
588
588
  )
589
589
 
590
+ def with_annotations(self, annotations: dict):
591
+ """set a key/value annotations in the metadata of the api gateway"""
592
+ for key, value in annotations.items():
593
+ self.metadata.annotations[key] = str(value)
594
+ return self
595
+
590
596
  @classmethod
591
597
  def from_scheme(cls, api_gateway: schemas.APIGateway):
592
598
  project = api_gateway.metadata.labels.get(
@@ -438,8 +438,9 @@ class ApplicationRuntime(RemoteRuntime):
438
438
  """
439
439
  Create the application API gateway. Once the application is deployed, the API gateway can be created.
440
440
  An application without an API gateway is not accessible.
441
- :param name: The name of the API gateway, defaults to <function-name>-<function-tag>
442
- :param path: Optional path of the API gateway, default value is "/"
441
+ :param name: The name of the API gateway
442
+ :param path: Optional path of the API gateway, default value is "/".
443
+ The given path should be supported by the deployed application
443
444
  :param direct_port_access: Set True to allow direct port access to the application sidecar
444
445
  :param authentication_mode: API Gateway authentication mode
445
446
  :param authentication_creds: API Gateway basic authentication credentials as a tuple (username, password)
mlrun/runtimes/pod.py CHANGED
@@ -38,6 +38,7 @@ from ..k8s_utils import (
38
38
  generate_preemptible_nodes_affinity_terms,
39
39
  generate_preemptible_nodes_anti_affinity_terms,
40
40
  generate_preemptible_tolerations,
41
+ validate_node_selectors,
41
42
  )
42
43
  from ..utils import logger, update_in
43
44
  from .base import BaseRuntime, FunctionSpec, spec_fields
@@ -1175,6 +1176,7 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
1175
1176
  if node_name:
1176
1177
  self.spec.node_name = node_name
1177
1178
  if node_selector is not None:
1179
+ validate_node_selectors(node_selectors=node_selector, raise_on_error=False)
1178
1180
  self.spec.node_selector = node_selector
1179
1181
  if affinity is not None:
1180
1182
  self.spec.affinity = affinity
@@ -1345,20 +1347,26 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
1345
1347
 
1346
1348
  def _build_image(
1347
1349
  self,
1348
- builder_env,
1349
- force_build,
1350
- mlrun_version_specifier,
1351
- show_on_failure,
1352
- skip_deployed,
1353
- watch,
1354
- is_kfp,
1355
- with_mlrun,
1350
+ builder_env: dict,
1351
+ force_build: bool,
1352
+ mlrun_version_specifier: typing.Optional[bool],
1353
+ show_on_failure: bool,
1354
+ skip_deployed: bool,
1355
+ watch: bool,
1356
+ is_kfp: bool,
1357
+ with_mlrun: typing.Optional[bool],
1356
1358
  ):
1357
1359
  # When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
1358
1360
  # is actually done. (when a pipelines pod exits, the pipeline step marked as done)
1359
1361
  if is_kfp:
1360
1362
  watch = True
1361
1363
 
1364
+ if skip_deployed and self.requires_build() and not self.is_deployed():
1365
+ logger.warning(
1366
+ f"Even though {skip_deployed=}, the build might be triggered due to the function's configuration. "
1367
+ "See requires_build() and is_deployed() for reasoning."
1368
+ )
1369
+
1362
1370
  db = self._get_db()
1363
1371
  data = db.remote_builder(
1364
1372
  self,
@@ -18,6 +18,7 @@ from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
18
18
 
19
19
  import mlrun.common.schemas.function
20
20
  import mlrun.errors
21
+ import mlrun.k8s_utils
21
22
  import mlrun.runtimes.pod
22
23
  from mlrun.config import config
23
24
 
@@ -505,6 +506,7 @@ class Spark3Runtime(KubejobRuntime):
505
506
  raise NotImplementedError(
506
507
  "Setting node name is not supported for spark runtime"
507
508
  )
509
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
508
510
  self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
509
511
  self.with_executor_node_selection(
510
512
  node_name, node_selector, affinity, tolerations
@@ -537,6 +539,7 @@ class Spark3Runtime(KubejobRuntime):
537
539
  if affinity is not None:
538
540
  self.spec.driver_affinity = affinity
539
541
  if node_selector is not None:
542
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
540
543
  self.spec.driver_node_selector = node_selector
541
544
  if tolerations is not None:
542
545
  self.spec.driver_tolerations = tolerations
@@ -568,6 +571,7 @@ class Spark3Runtime(KubejobRuntime):
568
571
  if affinity is not None:
569
572
  self.spec.executor_affinity = affinity
570
573
  if node_selector is not None:
574
+ mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
571
575
  self.spec.executor_node_selector = node_selector
572
576
  if tolerations is not None:
573
577
  self.spec.executor_tolerations = tolerations
mlrun/utils/async_http.py CHANGED
@@ -237,7 +237,7 @@ class _CustomRequestContext(_RequestContext):
237
237
  retry_wait = self._retry_options.get_timeout(
238
238
  attempt=current_attempt, response=None
239
239
  )
240
- self._logger.debug(
240
+ self._logger.warning(
241
241
  "Request failed on retryable exception, retrying",
242
242
  retry_wait_secs=retry_wait,
243
243
  method=params.method,
mlrun/utils/helpers.py CHANGED
@@ -41,7 +41,7 @@ import semver
41
41
  import yaml
42
42
  from dateutil import parser
43
43
  from mlrun_pipelines.models import PipelineRun
44
- from pandas._libs.tslibs.timestamps import Timedelta, Timestamp
44
+ from pandas import Timedelta, Timestamp
45
45
  from yaml.representer import RepresenterError
46
46
 
47
47
  import mlrun
@@ -133,18 +133,25 @@ def is_legacy_artifact(artifact):
133
133
  logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
134
134
  missing = object()
135
135
 
136
- is_ipython = False
136
+ is_ipython = False # is IPython terminal, including Jupyter
137
+ is_jupyter = False # is Jupyter notebook/lab terminal
137
138
  try:
138
- import IPython
139
+ import IPython.core.getipython
139
140
 
140
- ipy = IPython.get_ipython()
141
- # if its IPython terminal ignore (cant show html)
142
- if ipy and "Terminal" not in str(type(ipy)):
143
- is_ipython = True
144
- except ImportError:
141
+ ipy = IPython.core.getipython.get_ipython()
142
+
143
+ is_ipython = ipy is not None
144
+ is_jupyter = (
145
+ is_ipython
146
+ # not IPython
147
+ and "Terminal" not in str(type(ipy))
148
+ )
149
+
150
+ del ipy
151
+ except ModuleNotFoundError:
145
152
  pass
146
153
 
147
- if is_ipython and config.nest_asyncio_enabled in ["1", "True"]:
154
+ if is_jupyter and config.nest_asyncio_enabled in ["1", "True"]:
148
155
  # bypass Jupyter asyncio bug
149
156
  import nest_asyncio
150
157
 
@@ -267,15 +274,29 @@ def validate_artifact_key_name(
267
274
  )
268
275
 
269
276
 
270
- def validate_inline_artifact_body_size(body: typing.Union[str, bytes, None]) -> None:
277
+ def validate_artifact_body_size(
278
+ body: typing.Union[str, bytes, None], is_inline: bool
279
+ ) -> None:
280
+ """
281
+ Validates the size of the artifact body.
282
+
283
+ :param body: The artifact body, which can be a string, bytes, or None.
284
+ :param is_inline: A flag indicating whether the artifact body is inline.
285
+
286
+ :raises mlrun.errors.MLRunBadRequestError: If the body exceeds the maximum allowed size.
287
+ """
271
288
  if body and len(body) > MYSQL_MEDIUMBLOB_SIZE_BYTES:
272
- raise mlrun.errors.MLRunBadRequestError(
273
- "The body of the artifact exceeds the maximum allowed size. "
274
- "Avoid embedding the artifact body. "
275
- "This increases the size of the project yaml file and could affect the project during loading and saving. "
276
- "More information is available at"
277
- "https://docs.mlrun.org/en/latest/projects/automate-project-git-source.html#setting-and-registering-the-project-artifacts"
278
- )
289
+ error_message = "The body of the artifact exceeds the maximum allowed size. "
290
+ if is_inline:
291
+ error_message += (
292
+ "Avoid embedding the artifact body. This increases the size of the project yaml file and could "
293
+ "affect the project during loading and saving. "
294
+ )
295
+ else:
296
+ error_message += (
297
+ "For larger artifacts, consider logging them through files instead."
298
+ )
299
+ raise mlrun.errors.MLRunBadRequestError(error_message)
279
300
 
280
301
 
281
302
  def validate_v3io_stream_consumer_group(
@@ -1007,6 +1028,23 @@ def get_workflow_url(project, id=None):
1007
1028
  return url
1008
1029
 
1009
1030
 
1031
+ def get_kfp_project_filter(project_name: str) -> str:
1032
+ """
1033
+ Generates a filter string for KFP runs, using a substring predicate
1034
+ on the run's 'name' field. This is used as a heuristic to retrieve runs that are associated
1035
+ with a specific project. The 'op: 9' operator indicates that the filter checks if the
1036
+ project name appears as a substring in the run's name, ensuring that we can identify
1037
+ runs belonging to the desired project.
1038
+ """
1039
+ is_substring_op = 9
1040
+ project_name_filter = {
1041
+ "predicates": [
1042
+ {"key": "name", "op": is_substring_op, "string_value": project_name}
1043
+ ]
1044
+ }
1045
+ return json.dumps(project_name_filter)
1046
+
1047
+
1010
1048
  def are_strings_in_exception_chain_messages(
1011
1049
  exception: Exception, strings_list: list[str]
1012
1050
  ) -> bool:
@@ -1404,11 +1442,7 @@ def is_running_in_jupyter_notebook() -> bool:
1404
1442
  Check if the code is running inside a Jupyter Notebook.
1405
1443
  :return: True if running inside a Jupyter Notebook, False otherwise.
1406
1444
  """
1407
- import IPython
1408
-
1409
- ipy = IPython.get_ipython()
1410
- # if its IPython terminal, it isn't a Jupyter ipython
1411
- return ipy and "Terminal" not in str(type(ipy))
1445
+ return is_jupyter
1412
1446
 
1413
1447
 
1414
1448
  def create_ipython_display():
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import enum
16
- import typing
17
16
 
18
17
  from mlrun.common.schemas.notification import NotificationKind
19
18
 
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "4029300162894b90ff3b3a732f627cc20dd33257",
3
- "version": "1.7.0-rc42"
2
+ "git_commit": "06b782bf0de829e2b6ae1f295286fb3e4f92c228",
3
+ "version": "1.7.0-rc44"
4
4
  }