mlrun 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show
  1. mlrun/artifacts/manager.py +6 -0
  2. mlrun/artifacts/model.py +28 -22
  3. mlrun/common/db/sql_session.py +3 -0
  4. mlrun/common/model_monitoring/helpers.py +4 -2
  5. mlrun/common/schemas/__init__.py +2 -0
  6. mlrun/common/schemas/common.py +40 -0
  7. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  8. mlrun/common/schemas/model_monitoring/constants.py +21 -5
  9. mlrun/common/schemas/project.py +2 -0
  10. mlrun/config.py +43 -17
  11. mlrun/data_types/data_types.py +4 -0
  12. mlrun/datastore/azure_blob.py +9 -9
  13. mlrun/datastore/base.py +22 -44
  14. mlrun/datastore/datastore.py +7 -3
  15. mlrun/datastore/datastore_profile.py +15 -3
  16. mlrun/datastore/google_cloud_storage.py +7 -7
  17. mlrun/datastore/sources.py +17 -4
  18. mlrun/datastore/targets.py +3 -1
  19. mlrun/datastore/utils.py +11 -1
  20. mlrun/datastore/v3io.py +70 -46
  21. mlrun/db/base.py +18 -0
  22. mlrun/db/httpdb.py +41 -36
  23. mlrun/execution.py +3 -3
  24. mlrun/feature_store/api.py +133 -132
  25. mlrun/feature_store/feature_set.py +89 -0
  26. mlrun/feature_store/feature_vector.py +120 -0
  27. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  28. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  29. mlrun/k8s_utils.py +56 -0
  30. mlrun/kfpops.py +19 -10
  31. mlrun/model.py +6 -0
  32. mlrun/model_monitoring/api.py +8 -8
  33. mlrun/model_monitoring/batch.py +1 -1
  34. mlrun/model_monitoring/controller.py +0 -7
  35. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
  36. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  37. mlrun/model_monitoring/stream_processing.py +52 -38
  38. mlrun/package/packagers/pandas_packagers.py +3 -3
  39. mlrun/package/utils/_archiver.py +3 -1
  40. mlrun/platforms/iguazio.py +6 -65
  41. mlrun/projects/pipelines.py +29 -12
  42. mlrun/projects/project.py +100 -61
  43. mlrun/run.py +2 -0
  44. mlrun/runtimes/base.py +24 -1
  45. mlrun/runtimes/function.py +14 -15
  46. mlrun/runtimes/kubejob.py +5 -3
  47. mlrun/runtimes/local.py +2 -2
  48. mlrun/runtimes/mpijob/abstract.py +6 -6
  49. mlrun/runtimes/pod.py +3 -3
  50. mlrun/runtimes/serving.py +7 -14
  51. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  52. mlrun/serving/remote.py +4 -2
  53. mlrun/serving/routers.py +14 -8
  54. mlrun/utils/async_http.py +3 -3
  55. mlrun/utils/helpers.py +59 -3
  56. mlrun/utils/http.py +3 -3
  57. mlrun/utils/logger.py +2 -2
  58. mlrun/utils/notifications/notification_pusher.py +6 -6
  59. mlrun/utils/regex.py +5 -1
  60. mlrun/utils/version/version.json +2 -2
  61. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/METADATA +21 -23
  62. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/RECORD +66 -65
  63. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/WHEEL +1 -1
  64. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/LICENSE +0 -0
  65. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/entry_points.txt +0 -0
  66. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/top_level.txt +0 -0
@@ -69,16 +69,16 @@ class WorkflowSpec(mlrun.model.ModelObj):
69
69
 
70
70
  def __init__(
71
71
  self,
72
- engine=None,
73
- code=None,
74
- path=None,
75
- args=None,
76
- name=None,
77
- handler=None,
78
- args_schema: dict = None,
72
+ engine: typing.Optional[str] = None,
73
+ code: typing.Optional[str] = None,
74
+ path: typing.Optional[str] = None,
75
+ args: typing.Optional[dict] = None,
76
+ name: typing.Optional[str] = None,
77
+ handler: typing.Optional[str] = None,
78
+ args_schema: typing.Optional[dict] = None,
79
79
  schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
80
- cleanup_ttl: int = None,
81
- image: str = None,
80
+ cleanup_ttl: typing.Optional[int] = None,
81
+ image: typing.Optional[str] = None,
82
82
  ):
83
83
  self.engine = engine
84
84
  self.code = code
@@ -401,6 +401,9 @@ def enrich_function_object(
401
401
  else:
402
402
  f.spec.build.source = project.spec.source
403
403
  f.spec.build.load_source_on_run = project.spec.load_source_on_run
404
+ f.spec.build.source_code_target_dir = (
405
+ project.spec.build.source_code_target_dir
406
+ )
404
407
  f.spec.workdir = project.spec.workdir or project.spec.subpath
405
408
  f.prepare_image_for_deploy()
406
409
 
@@ -605,6 +608,7 @@ class _KFPRunner(_PipelineRunner):
605
608
  namespace=namespace,
606
609
  artifact_path=artifact_path,
607
610
  cleanup_ttl=workflow_spec.cleanup_ttl,
611
+ timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
608
612
  )
609
613
 
610
614
  # The user provided workflow code might have made changes to function specs that require cleanup
@@ -862,10 +866,21 @@ class _RemoteRunner(_PipelineRunner):
862
866
  )
863
867
  return
864
868
 
869
+ get_workflow_id_timeout = max(
870
+ int(mlrun.mlconf.workflows.timeouts.remote),
871
+ int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
872
+ )
873
+
874
+ logger.debug(
875
+ "Workflow submitted, waiting for pipeline run to start",
876
+ workflow_name=workflow_response.name,
877
+ get_workflow_id_timeout=get_workflow_id_timeout,
878
+ )
879
+
865
880
  # Getting workflow id from run:
866
881
  response = retry_until_successful(
867
882
  1,
868
- getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
883
+ get_workflow_id_timeout,
869
884
  logger,
870
885
  False,
871
886
  run_db.get_workflow_id,
@@ -988,6 +1003,7 @@ def load_and_run(
988
1003
  cleanup_ttl: int = None,
989
1004
  load_only: bool = False,
990
1005
  wait_for_completion: bool = False,
1006
+ project_context: str = None,
991
1007
  ):
992
1008
  """
993
1009
  Auxiliary function that the RemoteRunner run once or run every schedule.
@@ -1018,10 +1034,11 @@ def load_and_run(
1018
1034
  workflow and all its resources are deleted)
1019
1035
  :param load_only: for just loading the project, inner use.
1020
1036
  :param wait_for_completion: wait for workflow completion before returning
1037
+ :param project_context: project context path (used for loading the project)
1021
1038
  """
1022
1039
  try:
1023
1040
  project = mlrun.load_project(
1024
- context=f"./{project_name}",
1041
+ context=project_context or f"./{project_name}",
1025
1042
  url=url,
1026
1043
  name=project_name,
1027
1044
  init_git=init_git,
@@ -1053,7 +1070,7 @@ def load_and_run(
1053
1070
 
1054
1071
  raise error
1055
1072
 
1056
- context.logger.info(f"Loaded project {project.name} from remote successfully")
1073
+ context.logger.info(f"Loaded project {project.name} successfully")
1057
1074
 
1058
1075
  if load_only:
1059
1076
  return
mlrun/projects/project.py CHANGED
@@ -24,13 +24,12 @@ import typing
24
24
  import uuid
25
25
  import warnings
26
26
  import zipfile
27
- from os import environ, makedirs, path, remove
27
+ from os import environ, makedirs, path
28
28
  from typing import Callable, Dict, List, Optional, Union
29
29
 
30
30
  import dotenv
31
31
  import git
32
32
  import git.exc
33
- import inflection
34
33
  import kfp
35
34
  import nuclio
36
35
  import requests
@@ -41,6 +40,7 @@ import mlrun.common.schemas.model_monitoring
41
40
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
42
41
  import mlrun.db
43
42
  import mlrun.errors
43
+ import mlrun.k8s_utils
44
44
  import mlrun.runtimes
45
45
  import mlrun.runtimes.pod
46
46
  import mlrun.runtimes.utils
@@ -171,7 +171,7 @@ def new_project(
171
171
  :param name: project name
172
172
  :param context: project local directory path (default value = "./")
173
173
  :param init_git: if True, will git init the context dir
174
- :param user_project: add the current user name to the provided project name (making it unique per user)
174
+ :param user_project: add the current username to the provided project name (making it unique per user)
175
175
  :param remote: remote Git url
176
176
  :param from_template: path to project YAML/zip file that will be used as a template
177
177
  :param secrets: key:secret dict or SecretsStore used to download sources
@@ -319,7 +319,7 @@ def load_project(
319
319
  :param init_git: if True, will git init the context dir
320
320
  :param subpath: project subpath (within the archive)
321
321
  :param clone: if True, always clone (delete any existing content)
322
- :param user_project: add the current user name to the project name (for db:// prefixes)
322
+ :param user_project: add the current username to the project name (for db:// prefixes)
323
323
  :param save: whether to save the created project and artifact in the DB
324
324
  :param sync_functions: sync the project's functions into the project object (will be saved to the DB if save=True)
325
325
  :param parameters: key/value pairs to add to the project.spec.params
@@ -420,7 +420,7 @@ def get_or_create_project(
420
420
  save: bool = True,
421
421
  parameters: dict = None,
422
422
  ) -> "MlrunProject":
423
- """Load a project from MLRun DB, or create/import if doesnt exist
423
+ """Load a project from MLRun DB, or create/import if it does not exist
424
424
 
425
425
  MLRun looks for a project.yaml file with project definition and objects in the project root path
426
426
  and use it to initialize the project, in addition it runs the project_setup.py file (if it exists)
@@ -605,9 +605,14 @@ def _load_project_dir(context, name="", subpath=""):
605
605
  # If there is a setup script do not force having project.yaml file
606
606
  project = MlrunProject()
607
607
  else:
608
- raise mlrun.errors.MLRunNotFoundError(
609
- "project or function YAML not found in path"
608
+ message = "Project or function YAML not found in path"
609
+ logger.error(
610
+ message,
611
+ context=context,
612
+ name=name,
613
+ subpath=subpath,
610
614
  )
615
+ raise mlrun.errors.MLRunNotFoundError(message)
611
616
 
612
617
  project.spec.context = context
613
618
  project.metadata.name = name or project.metadata.name
@@ -620,9 +625,9 @@ def _add_username_to_project_name_if_needed(name, user_project):
620
625
  if not name:
621
626
  raise ValueError("user_project must be specified together with name")
622
627
  username = environ.get("V3IO_USERNAME") or getpass.getuser()
623
- normalized_username = inflection.dasherize(username.lower())
628
+ normalized_username = mlrun.utils.normalize_project_username(username.lower())
624
629
  if username != normalized_username:
625
- logger.info(
630
+ logger.debug(
626
631
  "Username was normalized to match the required pattern for project name",
627
632
  username=username,
628
633
  normalized_username=normalized_username,
@@ -694,6 +699,31 @@ class ProjectMetadata(ModelObj):
694
699
  return False
695
700
  return True
696
701
 
702
+ @staticmethod
703
+ def validate_project_labels(labels: dict, raise_on_failure: bool = True) -> bool:
704
+ """
705
+ This
706
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
707
+ """
708
+
709
+ # no labels is a valid case
710
+ if not labels:
711
+ return True
712
+ if not isinstance(labels, dict):
713
+ raise mlrun.errors.MLRunInvalidArgumentError(
714
+ "Labels must be a dictionary of key-value pairs"
715
+ )
716
+ try:
717
+ for key, value in labels.items():
718
+ mlrun.k8s_utils.verify_label_key(key)
719
+ mlrun.k8s_utils.verify_label_value(value, label_key=key)
720
+
721
+ except mlrun.errors.MLRunInvalidArgumentError:
722
+ if raise_on_failure:
723
+ raise
724
+ return False
725
+ return True
726
+
697
727
 
698
728
  class ProjectSpec(ModelObj):
699
729
  def __init__(
@@ -1210,20 +1240,20 @@ class MlrunProject(ModelObj):
1210
1240
  self,
1211
1241
  name,
1212
1242
  workflow_path: str,
1213
- embed=False,
1214
- engine=None,
1215
- args_schema: typing.List[EntrypointParam] = None,
1216
- handler=None,
1243
+ embed: bool = False,
1244
+ engine: Optional[str] = None,
1245
+ args_schema: list[EntrypointParam] = None,
1246
+ handler: Optional[str] = None,
1217
1247
  schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
1218
- ttl=None,
1219
- image: str = None,
1248
+ ttl: Optional[int] = None,
1249
+ image: Optional[str] = None,
1220
1250
  **args,
1221
1251
  ):
1222
1252
  """Add or update a workflow, specify a name and the code path
1223
1253
 
1224
1254
  :param name: Name of the workflow
1225
1255
  :param workflow_path: URL (remote) / Path (absolute or relative to the project code path i.e.
1226
- <project.spec.get_code_path()>/<workflow_path>) for the workflow file.
1256
+ <project.spec.get_code_path()>/<workflow_path>) for the workflow file.
1227
1257
  :param embed: Add the workflow code into the project.yaml
1228
1258
  :param engine: Workflow processing engine ("kfp", "local", "remote" or "remote:local")
1229
1259
  :param args_schema: List of arg schema definitions (:py:class`~mlrun.model.EntrypointParam`)
@@ -2570,40 +2600,40 @@ class MlrunProject(ModelObj):
2570
2600
  cleanup_ttl: int = None,
2571
2601
  notifications: typing.List[mlrun.model.Notification] = None,
2572
2602
  ) -> _PipelineRunStatus:
2573
- """run a workflow using kubeflow pipelines
2574
-
2575
- :param name: name of the workflow
2576
- :param workflow_path:
2577
- url to a workflow file, if not a project workflow
2578
- :param arguments:
2579
- kubeflow pipelines arguments (parameters)
2580
- :param artifact_path:
2581
- target path/url for workflow artifacts, the string
2582
- '{{workflow.uid}}' will be replaced by workflow id
2583
- :param workflow_handler:
2584
- workflow function handler (for running workflow function directly)
2585
- :param namespace: kubernetes namespace if other than default
2586
- :param sync: force functions sync before run
2587
- :param watch: wait for pipeline completion
2588
- :param dirty: allow running the workflow when the git repo is dirty
2589
- :param engine: workflow engine running the workflow.
2590
- supported values are 'kfp' (default), 'local' or 'remote'.
2591
- for setting engine for remote running use 'remote:local' or 'remote:kfp'.
2592
- :param local: run local pipeline with local functions (set local=True in function.run())
2603
+ """Run a workflow using kubeflow pipelines
2604
+
2605
+ :param name: Name of the workflow
2606
+ :param workflow_path: URL to a workflow file, if not a project workflow
2607
+ :param arguments: Kubeflow pipelines arguments (parameters)
2608
+ :param artifact_path: Target path/URL for workflow artifacts, the string '{{workflow.uid}}' will be
2609
+ replaced by workflow id.
2610
+ :param workflow_handler: Workflow function handler (for running workflow function directly)
2611
+ :param namespace: Kubernetes namespace if other than default
2612
+ :param sync: Force functions sync before run
2613
+ :param watch: Wait for pipeline completion
2614
+ :param dirty: Allow running the workflow when the git repo is dirty
2615
+ :param engine: Workflow engine running the workflow.
2616
+ Supported values are 'kfp' (default), 'local' or 'remote'.
2617
+ For setting engine for remote running use 'remote:local' or 'remote:kfp'.
2618
+ :param local: Run local pipeline with local functions (set local=True in function.run())
2593
2619
  :param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
2594
2620
  (which will be converted to the class using its `from_crontab` constructor),
2595
2621
  see this link for help:
2596
2622
  https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
2597
- for using the pre-defined workflow's schedule, set `schedule=True`
2598
- :param timeout: timeout in seconds to wait for pipeline completion (watch will be activated)
2599
- :param source: remote source to use instead of the actual `project.spec.source` (used when engine is remote).
2600
- for other engines the source is to validate that the code is up-to-date
2601
- :param cleanup_ttl:
2602
- pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
2603
- workflow and all its resources are deleted)
2604
- :param notifications:
2605
- list of notifications to send for workflow completion
2606
- :returns: run id
2623
+ For using the pre-defined workflow's schedule, set `schedule=True`
2624
+ :param timeout: Timeout in seconds to wait for pipeline completion (watch will be activated)
2625
+ :param source: Source to use instead of the actual `project.spec.source` (used when engine is remote).
2626
+ Can be a one of:
2627
+ 1. Remote URL which is loaded dynamically to the workflow runner.
2628
+ 2. A path to the project's context on the workflow runner's image.
2629
+ Path can be absolute or relative to `project.spec.build.source_code_target_dir` if defined
2630
+ (enriched when building a project image with source, see `MlrunProject.build_image`).
2631
+ For other engines the source is used to validate that the code is up-to-date.
2632
+ :param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
2633
+ workflow and all its resources are deleted)
2634
+ :param notifications: List of notifications to send for workflow completion
2635
+
2636
+ :returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
2607
2637
  """
2608
2638
 
2609
2639
  arguments = arguments or {}
@@ -2750,7 +2780,7 @@ class MlrunProject(ModelObj):
2750
2780
  def export(self, filepath=None, include_files: str = None):
2751
2781
  """save the project object into a yaml file or zip archive (default to project.yaml)
2752
2782
 
2753
- By default the project object is exported to a yaml file, when the filepath suffix is '.zip'
2783
+ By default, the project object is exported to a yaml file, when the filepath suffix is '.zip'
2754
2784
  the project context dir (code files) are also copied into the zip, the archive path can include
2755
2785
  DataItem urls (for remote object storage, e.g. s3://<bucket>/<path>).
2756
2786
 
@@ -2775,19 +2805,19 @@ class MlrunProject(ModelObj):
2775
2805
 
2776
2806
  if archive_code:
2777
2807
  files_filter = include_files or "**"
2778
- tmp_path = None
2779
- if "://" in filepath:
2780
- tmp_path = tempfile.mktemp(".zip")
2781
- zipf = zipfile.ZipFile(tmp_path or filepath, "w")
2782
- for file_path in glob.iglob(
2783
- f"{project_dir}/{files_filter}", recursive=True
2784
- ):
2785
- write_path = pathlib.Path(file_path)
2786
- zipf.write(write_path, arcname=write_path.relative_to(project_dir))
2787
- zipf.close()
2788
- if tmp_path:
2789
- mlrun.get_dataitem(filepath).upload(tmp_path)
2790
- remove(tmp_path)
2808
+ with tempfile.NamedTemporaryFile(suffix=".zip") as f:
2809
+ remote_file = "://" in filepath
2810
+ fpath = f.name if remote_file else filepath
2811
+ with zipfile.ZipFile(fpath, "w") as zipf:
2812
+ for file_path in glob.iglob(
2813
+ f"{project_dir}/{files_filter}", recursive=True
2814
+ ):
2815
+ write_path = pathlib.Path(file_path)
2816
+ zipf.write(
2817
+ write_path, arcname=write_path.relative_to(project_dir)
2818
+ )
2819
+ if remote_file:
2820
+ mlrun.get_dataitem(filepath).upload(zipf.filename)
2791
2821
 
2792
2822
  def set_model_monitoring_credentials(
2793
2823
  self,
@@ -3002,6 +3032,7 @@ class MlrunProject(ModelObj):
3002
3032
  requirements_file: str = None,
3003
3033
  builder_env: dict = None,
3004
3034
  extra_args: str = None,
3035
+ source_code_target_dir: str = None,
3005
3036
  ):
3006
3037
  """specify builder configuration for the project
3007
3038
 
@@ -3022,6 +3053,8 @@ class MlrunProject(ModelObj):
3022
3053
  e.g. builder_env={"GIT_TOKEN": token}, does not work yet in KFP
3023
3054
  :param extra_args: A string containing additional builder arguments in the format of command-line options,
3024
3055
  e.g. extra_args="--skip-tls-verify --build-arg A=val"
3056
+ :param source_code_target_dir: Path on the image where source code would be extracted
3057
+ (by default `/home/mlrun_code`)
3025
3058
  """
3026
3059
  if not overwrite_build_params:
3027
3060
  # TODO: change overwrite_build_params default to True in 1.8.0
@@ -3045,6 +3078,7 @@ class MlrunProject(ModelObj):
3045
3078
  overwrite=overwrite_build_params,
3046
3079
  builder_env=builder_env,
3047
3080
  extra_args=extra_args,
3081
+ source_code_target_dir=source_code_target_dir,
3048
3082
  )
3049
3083
 
3050
3084
  if set_as_default and image != self.default_image:
@@ -3091,7 +3125,7 @@ class MlrunProject(ModelObj):
3091
3125
  * False: The new params are merged with the existing
3092
3126
  * True: The existing params are replaced by the new ones
3093
3127
  :param extra_args: A string containing additional builder arguments in the format of command-line options,
3094
- e.g. extra_args="--skip-tls-verify --build-arg A=val"r
3128
+ e.g. extra_args="--skip-tls-verify --build-arg A=val"
3095
3129
  :param target_dir: Path on the image where source code would be extracted (by default `/home/mlrun_code`)
3096
3130
  """
3097
3131
  if not base_image:
@@ -3159,6 +3193,11 @@ class MlrunProject(ModelObj):
3159
3193
  force_build=True,
3160
3194
  )
3161
3195
 
3196
+ # Get the enriched target dir from the function
3197
+ self.spec.build.source_code_target_dir = (
3198
+ function.spec.build.source_code_target_dir
3199
+ )
3200
+
3162
3201
  try:
3163
3202
  mlrun.db.get_run_db(secrets=self._secrets).delete_function(
3164
3203
  name=function.metadata.name
mlrun/run.py CHANGED
@@ -851,6 +851,7 @@ def _run_pipeline(
851
851
  ops=None,
852
852
  url=None,
853
853
  cleanup_ttl=None,
854
+ timeout=60,
854
855
  ):
855
856
  """remote KubeFlow pipeline execution
856
857
 
@@ -888,6 +889,7 @@ def _run_pipeline(
888
889
  ops=ops,
889
890
  artifact_path=artifact_path,
890
891
  cleanup_ttl=cleanup_ttl,
892
+ timeout=timeout,
891
893
  )
892
894
  logger.info(f"Pipeline run id={pipeline_run_id}, check UI for progress")
893
895
  return pipeline_run_id
mlrun/runtimes/base.py CHANGED
@@ -15,6 +15,7 @@ import enum
15
15
  import http
16
16
  import re
17
17
  import typing
18
+ import warnings
18
19
  from base64 import b64encode
19
20
  from os import environ
20
21
  from typing import Callable, Dict, List, Optional, Union
@@ -124,7 +125,7 @@ class FunctionSpec(ModelObj):
124
125
  self.allow_empty_resources = None
125
126
  # the build.source is cloned/extracted to the specified clone_target_dir
126
127
  # if a relative path is specified, it will be enriched with a temp dir path
127
- self.clone_target_dir = clone_target_dir or ""
128
+ self._clone_target_dir = clone_target_dir or None
128
129
 
129
130
  @property
130
131
  def build(self) -> ImageBuilder:
@@ -134,6 +135,28 @@ class FunctionSpec(ModelObj):
134
135
  def build(self, build):
135
136
  self._build = self._verify_dict(build, "build", ImageBuilder)
136
137
 
138
+ @property
139
+ def clone_target_dir(self):
140
+ # TODO: remove this property in 1.9.0
141
+ if self.build.source_code_target_dir:
142
+ warnings.warn(
143
+ "The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.9.0. "
144
+ "Use spec.build.source_code_target_dir instead.",
145
+ FutureWarning,
146
+ )
147
+ return self.build.source_code_target_dir
148
+
149
+ @clone_target_dir.setter
150
+ def clone_target_dir(self, clone_target_dir):
151
+ # TODO: remove this property in 1.9.0
152
+ if clone_target_dir:
153
+ warnings.warn(
154
+ "The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.9.0. "
155
+ "Use spec.build.source_code_target_dir instead.",
156
+ FutureWarning,
157
+ )
158
+ self.build.source_code_target_dir = clone_target_dir
159
+
137
160
  def enrich_function_preemption_spec(self):
138
161
  pass
139
162
 
@@ -432,15 +432,15 @@ class RemoteRuntime(KubeResource):
432
432
  raise ValueError(
433
433
  "gateway timeout must be greater than the worker timeout"
434
434
  )
435
- annotations[
436
- "nginx.ingress.kubernetes.io/proxy-connect-timeout"
437
- ] = f"{gateway_timeout}"
438
- annotations[
439
- "nginx.ingress.kubernetes.io/proxy-read-timeout"
440
- ] = f"{gateway_timeout}"
441
- annotations[
442
- "nginx.ingress.kubernetes.io/proxy-send-timeout"
443
- ] = f"{gateway_timeout}"
435
+ annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
436
+ f"{gateway_timeout}"
437
+ )
438
+ annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = (
439
+ f"{gateway_timeout}"
440
+ )
441
+ annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = (
442
+ f"{gateway_timeout}"
443
+ )
444
444
 
445
445
  trigger = nuclio.HttpTrigger(
446
446
  workers=workers,
@@ -500,10 +500,9 @@ class RemoteRuntime(KubeResource):
500
500
  # verify v3io stream trigger name is valid
501
501
  mlrun.utils.helpers.validate_v3io_stream_consumer_group(group)
502
502
 
503
- consumer_group = kwargs.pop("consumerGroup", None)
504
- if consumer_group:
503
+ if "consumer_group" in kwargs:
505
504
  logger.warning(
506
- "'consumerGroup' kwargs value is ignored. use group argument instead"
505
+ "'consumer_group' in kwargs will be ignored. Use group parameter instead."
507
506
  )
508
507
 
509
508
  container, path = split_path(stream_path)
@@ -517,11 +516,11 @@ class RemoteRuntime(KubeResource):
517
516
  name=name,
518
517
  container=container,
519
518
  path=path[1:],
520
- consumerGroup=group,
521
- seekTo=seek_to,
519
+ consumer_group=group,
520
+ seek_to=seek_to,
522
521
  webapi=endpoint or "http://v3io-webapi:8081",
523
522
  extra_attributes=extra_attributes,
524
- readBatchSize=256,
523
+ read_batch_size=256,
525
524
  **kwargs,
526
525
  ),
527
526
  )
mlrun/runtimes/kubejob.py CHANGED
@@ -73,7 +73,7 @@ class KubejobRuntime(KubeResource):
73
73
  if workdir:
74
74
  self.spec.workdir = workdir
75
75
  if target_dir:
76
- self.spec.clone_target_dir = target_dir
76
+ self.spec.build.source_code_target_dir = target_dir
77
77
 
78
78
  self.spec.build.load_source_on_run = pull_at_runtime
79
79
  if (
@@ -232,8 +232,10 @@ class KubejobRuntime(KubeResource):
232
232
  self.spec.build.base_image = self.spec.build.base_image or get_in(
233
233
  data, "data.spec.build.base_image"
234
234
  )
235
- # get the clone target dir in case it was enriched due to loading source
236
- self.spec.clone_target_dir = get_in(data, "data.spec.clone_target_dir")
235
+ # Get the source target dir in case it was enriched due to loading source
236
+ self.spec.build.source_code_target_dir = get_in(
237
+ data, "data.spec.build.source_code_target_dir"
238
+ ) or get_in(data, "data.spec.clone_target_dir")
237
239
  ready = data.get("ready", False)
238
240
  if not ready:
239
241
  logger.info(
mlrun/runtimes/local.py CHANGED
@@ -218,7 +218,7 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
218
218
  if workdir:
219
219
  self.spec.workdir = workdir
220
220
  if target_dir:
221
- self.spec.clone_target_dir = target_dir
221
+ self.spec.build.source_code_target_dir = target_dir
222
222
 
223
223
  def is_deployed(self):
224
224
  return True
@@ -240,7 +240,7 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
240
240
  if self.spec.build.source and not hasattr(self, "_is_run_local"):
241
241
  target_dir = extract_source(
242
242
  self.spec.build.source,
243
- self.spec.clone_target_dir,
243
+ self.spec.build.source_code_target_dir,
244
244
  secrets=execution._secrets_manager,
245
245
  )
246
246
  if workdir and not workdir.startswith("/"):
@@ -196,13 +196,13 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
196
196
  if steps_per_sample is not None:
197
197
  horovod_autotune_settings["autotune-steps-per-sample"] = steps_per_sample
198
198
  if bayes_opt_max_samples is not None:
199
- horovod_autotune_settings[
200
- "autotune-bayes-opt-max-samples"
201
- ] = bayes_opt_max_samples
199
+ horovod_autotune_settings["autotune-bayes-opt-max-samples"] = (
200
+ bayes_opt_max_samples
201
+ )
202
202
  if gaussian_process_noise is not None:
203
- horovod_autotune_settings[
204
- "autotune-gaussian-process-noise"
205
- ] = gaussian_process_noise
203
+ horovod_autotune_settings["autotune-gaussian-process-noise"] = (
204
+ gaussian_process_noise
205
+ )
206
206
 
207
207
  self.set_envs(horovod_autotune_settings)
208
208
 
mlrun/runtimes/pod.py CHANGED
@@ -430,9 +430,9 @@ class KubeResourceSpec(FunctionSpec):
430
430
  )
431
431
  is None
432
432
  ):
433
- resources[resource_requirement][
434
- resource_type
435
- ] = default_resources[resource_requirement][resource_type]
433
+ resources[resource_requirement][resource_type] = (
434
+ default_resources[resource_requirement][resource_type]
435
+ )
436
436
  # This enables the user to define that no defaults would be applied on the resources
437
437
  elif resources == {}:
438
438
  return resources
mlrun/runtimes/serving.py CHANGED
@@ -309,7 +309,8 @@ class ServingRuntime(RemoteRuntime):
309
309
  stream_args: dict = None,
310
310
  tracking_policy: Union[TrackingPolicy, dict] = None,
311
311
  ):
312
- """set tracking parameters:
312
+ """apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
313
+ and analyze performance.
313
314
 
314
315
  :param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
315
316
  you can use the "dummy://" path for test/simulation.
@@ -484,11 +485,10 @@ class ServingRuntime(RemoteRuntime):
484
485
  )
485
486
  extra_attributes = trigger_args.get("extra_attributes", {})
486
487
  trigger_args["extra_attributes"] = extra_attributes
487
- extra_attributes["workerAllocationMode"] = extra_attributes.get(
488
- "workerAllocationMode", "static"
488
+ extra_attributes["worker_allocation_mode"] = extra_attributes.get(
489
+ "worker_allocation_mode", "static"
489
490
  )
490
491
 
491
- max_workers_default = 4
492
492
  if (
493
493
  stream.path.startswith("kafka://")
494
494
  or "kafka_bootstrap_servers" in stream.options
@@ -497,9 +497,6 @@ class ServingRuntime(RemoteRuntime):
497
497
  if brokers:
498
498
  brokers = brokers.split(",")
499
499
  topic, brokers = parse_kafka_url(stream.path, brokers)
500
- trigger_args["max_workers"] = trigger_args.get(
501
- "max_workers", max_workers_default
502
- )
503
500
  trigger = KafkaTrigger(
504
501
  brokers=brokers,
505
502
  topics=[topic],
@@ -510,10 +507,6 @@ class ServingRuntime(RemoteRuntime):
510
507
  else:
511
508
  # V3IO doesn't allow hyphens in object names
512
509
  group = group.replace("-", "_")
513
- # Deal with unconventional parameter naming in V3IOStreamTrigger specifically
514
- trigger_args["maxWorkers"] = trigger_args.get(
515
- "maxWorkers", max_workers_default
516
- )
517
510
  child_function.function_object.add_v3io_stream_trigger(
518
511
  stream.path, group=group, shards=stream.shards, **trigger_args
519
512
  )
@@ -530,9 +523,9 @@ class ServingRuntime(RemoteRuntime):
530
523
  function_object.metadata.tag = self.metadata.tag
531
524
 
532
525
  function_object.metadata.labels = function_object.metadata.labels or {}
533
- function_object.metadata.labels[
534
- "mlrun/parent-function"
535
- ] = self.metadata.name
526
+ function_object.metadata.labels["mlrun/parent-function"] = (
527
+ self.metadata.name
528
+ )
536
529
  function_object._is_child_function = True
537
530
  if not function_object.spec.graph:
538
531
  # copy the current graph only if the child doesnt have a graph of his own
@@ -345,9 +345,9 @@ class Spark3JobSpec(KubeResourceSpec):
345
345
  )
346
346
  is None
347
347
  ):
348
- resources[resource_requirement][
349
- resource_type
350
- ] = default_resources[resource_requirement][resource_type]
348
+ resources[resource_requirement][resource_type] = (
349
+ default_resources[resource_requirement][resource_type]
350
+ )
351
351
  else:
352
352
  resources = default_resources
353
353