ob-metaflow-extensions 1.1.171rc1__py2.py3-none-any.whl → 1.1.173__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -324,7 +324,6 @@ CLIS_DESC = [
324
324
  ("nvct", ".nvct.nvct_cli.cli"),
325
325
  ("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
326
326
  ("snowpark", ".snowpark.snowpark_cli.cli"),
327
- ("app", ".apps.app_cli.cli"),
328
327
  ]
329
328
  STEP_DECORATORS_DESC = [
330
329
  ("nvidia", ".nvcf.nvcf_decorator.NvcfDecorator"),
@@ -46,23 +46,23 @@ SUPPORTABLE_GPU_TYPES = {
46
46
  "H100": [
47
47
  {
48
48
  "n_gpus": 1,
49
- "instance_type": "GCP.GPU.H100_1x",
50
- "backend": "gcp-asia-se-1a",
49
+ "instance_type": "OCI.GPU.H100_1x",
50
+ "backend": "nvcf-dgxc-k8s-oci-nrt-prd8",
51
51
  },
52
52
  {
53
53
  "n_gpus": 2,
54
- "instance_type": "GCP.GPU.H100_2x",
55
- "backend": "gcp-asia-se-1a",
54
+ "instance_type": "OCI.GPU.H100_2x",
55
+ "backend": "nvcf-dgxc-k8s-oci-nrt-prd8",
56
56
  },
57
57
  {
58
58
  "n_gpus": 4,
59
- "instance_type": "GCP.GPU.H100_4x",
60
- "backend": "gcp-asia-se-1a",
59
+ "instance_type": "OCI.GPU.H100_4x",
60
+ "backend": "nvcf-dgxc-k8s-oci-nrt-prd8",
61
61
  },
62
62
  {
63
63
  "n_gpus": 8,
64
- "instance_type": "GCP.GPU.H100_8x",
65
- "backend": "gcp-asia-se-1a",
64
+ "instance_type": "OCI.GPU.H100_8x",
65
+ "backend": "nvcf-dgxc-k8s-oci-nrt-prd8",
66
66
  },
67
67
  ],
68
68
  "NEBIUS_H100": [
@@ -13,12 +13,14 @@ class TorchTune:
13
13
  def __init__(
14
14
  self,
15
15
  use_multi_node_config: bool = False,
16
+ config_overrides: Optional[Dict] = None,
16
17
  ):
17
18
  """
18
19
  Initialize the Tune launcher.
19
20
 
20
21
  :param use_multi_node_config: If True, attempt to build a distributed configuration
21
22
  from current.torch.torchrun_args.
23
+ :param config_overrides: Optional dictionary of config overrides for tune run.
22
24
  """
23
25
  self.multi_node_config = {}
24
26
  if use_multi_node_config:
@@ -37,6 +39,8 @@ class TorchTune:
37
39
  "num_processes": current.torch.torchrun_args["nproc_per_node"]
38
40
  * current.torch.torchrun_args["nnodes"],
39
41
  }
42
+ if config_overrides:
43
+ self.multi_node_config.update(config_overrides)
40
44
  print(
41
45
  f"[Metaflow Tune] Discovered multi-node config for torchrun: {self.multi_node_config}"
42
46
  )
@@ -1,2 +1 @@
1
1
  from ..plugins.kubernetes.pod_killer import PodKiller
2
- from ..plugins.fast_bakery.baker import bake_image
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.171rc1
3
+ Version: 1.1.173
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -1,11 +1,10 @@
1
1
  metaflow_extensions/outerbounds/__init__.py,sha256=Gb8u06s9ClQsA_vzxmkCzuMnigPy7kKcDnLfb7eB-64,514
2
2
  metaflow_extensions/outerbounds/remote_config.py,sha256=pEFJuKDYs98eoB_-ryPjVi9b_c4gpHMdBHE14ltoxIU,4672
3
3
  metaflow_extensions/outerbounds/config/__init__.py,sha256=JsQGRuGFz28fQWjUvxUgR8EKBLGRdLUIk_buPLJplJY,1225
4
- metaflow_extensions/outerbounds/plugins/__init__.py,sha256=rWDE4k-KbsFsCoqct1Rw7w3bSV0jLunx6LMBKD2SYEA,13747
4
+ metaflow_extensions/outerbounds/plugins/__init__.py,sha256=qaGCEa_QFWgGURABv-ss0TYbC1RJTLlckogTA5tc3Bk,13713
5
5
  metaflow_extensions/outerbounds/plugins/auth_server.py,sha256=_Q9_2EL0Xy77bCRphkwT1aSu8gQXRDOH-Z-RxTUO8N4,2202
6
6
  metaflow_extensions/outerbounds/plugins/perimeters.py,sha256=QXh3SFP7GQbS-RAIxUOPbhPzQ7KDFVxZkTdKqFKgXjI,2697
7
7
  metaflow_extensions/outerbounds/plugins/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py,sha256=Uv9viEmQ0_6ogVHO8O_FGQyXR6rV5HR5agsP3gGnKm0,638
9
8
  metaflow_extensions/outerbounds/plugins/apps/app_utils.py,sha256=sw9whU17lAzlD2K2kEDNjlk1Ib-2xE2UNhJkmzD8Qv8,8543
10
9
  metaflow_extensions/outerbounds/plugins/apps/consts.py,sha256=iHsyqbUg9k-rgswCs1Jxf5QZIxR1V-peCDRjgr9kdBM,177
11
10
  metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py,sha256=VkmiMdNYHhNdt-Qm9AVv7aE2LWFsIFEc16YcOYjwF6Q,8568
@@ -21,7 +20,6 @@ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py,sha256
21
20
  metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py,sha256=_WzoOROFjoFa8TzsMNFp-r_1Zz7NUp-5ljn_kKlczXA,4534
22
21
  metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py,sha256=zgqDLFewCeF5jqh-hUNKmC_OAjld09ln0bb8Lkeqapc,4659
23
22
  metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py,sha256=ShE5omFBr83wkvEhL_ptRFvDNMs6wefg4BjaafQjTcM,3602
25
23
  metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py,sha256=nmp_INGIAiWyrhyJ71BH38eRLu1xCIEEKejmXNQ6RlA,15378
26
24
  metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py,sha256=PE81ZB54OAMXkMGSB7JqgvgMg7N9kvoVclrWL-6jc2U,5626
27
25
  metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py,sha256=kqFyu2bJSnc9_9aYfBpz5xK6L6luWFZK_NMuh8f1eVk,1494
@@ -45,7 +43,7 @@ metaflow_extensions/outerbounds/plugins/nvct/__init__.py,sha256=47DEQpj8HBSa-_TI
45
43
  metaflow_extensions/outerbounds/plugins/nvct/exceptions.py,sha256=1PiV6FdH36CvkmHh5jtsfrsoe3Q_Fo1NomHw5wvgoDM,2886
46
44
  metaflow_extensions/outerbounds/plugins/nvct/nvct.py,sha256=Z2ZPWGuHe58au_d6GfHiw6Nl5d8INdLDI5exlsPEOSA,3564
47
45
  metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py,sha256=bB9AURhRep9PV_-b-qLHpgw_GPG_xFoq1PeHEgFP1mQ,10104
48
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py,sha256=HKCvYn1Jh8uwLXeUqPNhxgBatq3mXNG5YIUl-zjNlHE,9429
46
+ metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py,sha256=RRGSDTziEA_wf8Ck8zePwAHTHc3VPbinnIU49PBM60c,9477
49
47
  metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py,sha256=8IPkdvuTZNIqgAAt75gVNn-ydr-Zz2sKC8UX_6pNEKI,7091
50
48
  metaflow_extensions/outerbounds/plugins/nvct/utils.py,sha256=U4_Fu8H94j_Bbox7mmMhNnlRhlYHqnK28R5w_TMWEFM,1029
51
49
  metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=4T8LQqAuh8flSMvYztw6-OPoDoAorcBWhC-vPuuQPbc,9234
@@ -68,7 +66,7 @@ metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py,sha256=F
68
66
  metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py,sha256=aQphxX6jqYgfa83w387pEWl0keuLm38V53I8P8UL2ck,6887
69
67
  metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py,sha256=AI_kcm1hZV3JRxJkookcH6twiGnAYjk9Dx-MeoYz60Y,8511
70
68
  metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py,sha256=9lUM4Cqi5RjrHBRfG6AQMRz8-R96eZC8Ih0KD2lv22Y,1858
71
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py,sha256=TOXNeyhcgd8VxplXO_oEuryFEsbk0tikn5GL0-44SU8,5853
69
+ metaflow_extensions/outerbounds/plugins/torchtune/__init__.py,sha256=Psj2ybj_E1qp5KK2inon9e4ZecaRxnPtW3ngcirbO2g,6094
72
70
  metaflow_extensions/outerbounds/plugins/vllm/__init__.py,sha256=O04DPVoEdCZhPbvdldaE4ztoAxJNXU-ExosBCqe43v8,6463
73
71
  metaflow_extensions/outerbounds/plugins/vllm/constants.py,sha256=ODX_uM5iYrzpVltsAdSf9Jo0DAOMiZ3647DcKdCnlS0,24
74
72
  metaflow_extensions/outerbounds/plugins/vllm/exceptions.py,sha256=8m65k2L17zXgSkgU299DWqxr1wGUMsZgSJw0hBRizJ0,49
@@ -78,7 +76,7 @@ metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8
78
76
  metaflow_extensions/outerbounds/profilers/gpu.py,sha256=3Er8uKQzfm_082uadg4yn_D4Y-iSCgzUfFmguYxZsz4,27485
79
77
  metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
80
78
  metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=_fKWv_-O1k5Nk5A1q05Ioh-PSsFXGL-jiAt7zfl8pIE,2999
81
- metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=VdoPHis20NUkasUiM8d6JEq-X8QVO4eMDaMBAJgKKLg,105
79
+ metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=RtF7t9EIyGjWi36jAHbRaLo_MNOxF5Jo8X9QWzOr0co,54
82
80
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
83
81
  metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
84
82
  metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
@@ -86,7 +84,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2
86
84
  metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
87
85
  metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
88
86
  metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py,sha256=ekcgD3KVydf-a0xMI60P4uy6ePkSEoFHiGnDq1JM940,45
89
- ob_metaflow_extensions-1.1.171rc1.dist-info/METADATA,sha256=ScWvbwRaG5a5c-EsnlwmjkDBmF3rlV5QonIg8JOkiJI,524
90
- ob_metaflow_extensions-1.1.171rc1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
91
- ob_metaflow_extensions-1.1.171rc1.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
92
- ob_metaflow_extensions-1.1.171rc1.dist-info/RECORD,,
87
+ ob_metaflow_extensions-1.1.173.dist-info/METADATA,sha256=um9vZD7UiAvC1J4Jysgp84JvWxtEj8Z7KNyqVGhVdDQ,521
88
+ ob_metaflow_extensions-1.1.173.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
89
+ ob_metaflow_extensions-1.1.173.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
90
+ ob_metaflow_extensions-1.1.173.dist-info/RECORD,,
@@ -1,29 +0,0 @@
1
- from metaflow._vendor import click
2
- import os
3
-
4
- os.environ["APPS_CLI_LOADING_IN_METAFLOW"] = "true"
5
- OUTERBOUNDS_APP_CLI_AVAILABLE = True
6
-
7
- try:
8
- import outerbounds.apps.app_cli as ob_apps_cli
9
- except ImportError:
10
- OUTERBOUNDS_APP_CLI_AVAILABLE = False
11
-
12
-
13
- if not OUTERBOUNDS_APP_CLI_AVAILABLE:
14
-
15
- @click.group()
16
- def _cli():
17
- pass
18
-
19
- @_cli.group(help="Dummy Group to append to CLI for Safety")
20
- def app():
21
- pass
22
-
23
- @app.command(help="Dummy Command to append to CLI for Safety")
24
- def cannot_deploy():
25
- raise Exception("Outerbounds App CLI not available")
26
-
27
- cli = _cli
28
- else:
29
- cli = ob_apps_cli.cli
@@ -1,110 +0,0 @@
1
- import threading
2
- import time
3
- import sys
4
- from typing import Dict, Optional, Any, Callable
5
- from functools import partial
6
- from metaflow.exception import MetaflowException
7
- from metaflow.metaflow_config import FAST_BAKERY_URL
8
-
9
- from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
10
- from .docker_environment import cache_request
11
-
12
- BAKERY_METAFILE = ".imagebakery-cache"
13
-
14
-
15
- class BakerException(MetaflowException):
16
- headline = "Ran into an error while baking image"
17
-
18
- def __init__(self, msg):
19
- super(BakerException, self).__init__(msg)
20
-
21
-
22
- def bake_image(
23
- cache_file_path: str,
24
- ref: Optional[str] = None,
25
- python: Optional[str] = None,
26
- pypi_packages: Optional[Dict[str, str]] = None,
27
- conda_packages: Optional[Dict[str, str]] = None,
28
- base_image: Optional[str] = None,
29
- logger: Optional[Callable[[str], Any]] = None,
30
- ) -> FastBakeryApiResponse:
31
- """
32
- Bakes a Docker image with the specified dependencies.
33
-
34
- Args:
35
- cache_file_path: Path to the cache file
36
- ref: Reference identifier for this bake (for logging purposes)
37
- python: Python version to use
38
- pypi_packages: Dictionary of PyPI packages and versions
39
- conda_packages: Dictionary of Conda packages and versions
40
- base_image: Base Docker image to use
41
- logger: Optional logger function to output progress
42
-
43
- Returns:
44
- FastBakeryApiResponse: The response from the bakery service
45
-
46
- Raises:
47
- BakerException: If the baking process fails
48
- """
49
- # Default logger if none provided
50
- if logger is None:
51
- logger = partial(print, file=sys.stderr)
52
-
53
- # Thread lock for logging
54
- logger_lock = threading.Lock()
55
- images_baked = 0
56
-
57
- @cache_request(cache_file_path)
58
- def _cached_bake(
59
- ref=None,
60
- python=None,
61
- pypi_packages=None,
62
- conda_packages=None,
63
- base_image=None,
64
- ):
65
- try:
66
- bakery = FastBakery(url=FAST_BAKERY_URL)
67
- bakery._reset_payload()
68
- bakery.python_version(python)
69
- bakery.pypi_packages(pypi_packages)
70
- bakery.conda_packages(conda_packages)
71
- bakery.base_image(base_image)
72
- # bakery.ignore_cache()
73
-
74
- with logger_lock:
75
- logger(f"🍳 Baking [{ref}] ...")
76
- logger(f" 🐍 Python: {python}")
77
-
78
- if pypi_packages:
79
- logger(f" 📦 PyPI packages:")
80
- for package, version in pypi_packages.items():
81
- logger(f" 🔧 {package}: {version}")
82
-
83
- if conda_packages:
84
- logger(f" 📦 Conda packages:")
85
- for package, version in conda_packages.items():
86
- logger(f" 🔧 {package}: {version}")
87
-
88
- logger(f" 🏗️ Base image: {base_image}")
89
-
90
- start_time = time.time()
91
- res = bakery.bake()
92
- # TODO: Get actual bake time from bakery
93
- bake_time = time.time() - start_time
94
-
95
- with logger_lock:
96
- logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
97
- nonlocal images_baked
98
- images_baked += 1
99
- return res
100
- except FastBakeryException as ex:
101
- raise BakerException(f"Bake [{ref}] failed: {str(ex)}")
102
-
103
- # Call the cached bake function with the provided parameters
104
- return _cached_bake(
105
- ref=ref,
106
- python=python,
107
- pypi_packages=pypi_packages,
108
- conda_packages=conda_packages,
109
- base_image=base_image,
110
- )