PyPI - ob-metaflow - Versions diffs - 2.12.32.1__py2.py3-none-any.whl → 2.12.35.1__py2.py3-none-any.whl - Mend

ob-metaflow 2.12.32.1py2.py3-none-any.whl → 2.12.35.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ob-metaflow might be problematic. Click here for more details.

Files changed (27) hide show

metaflow/flowspec.py +1 -0
metaflow/metaflow_config.py +2 -0
metaflow/multicore_utils.py +31 -14
metaflow/plugins/airflow/airflow.py +18 -17
metaflow/plugins/argo/argo_workflows.py +22 -13
metaflow/plugins/argo/argo_workflows_deployer_objects.py +4 -8
metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +4 -8
metaflow/plugins/datatools/s3/s3.py +12 -4
metaflow/plugins/events_decorator.py +1 -1
metaflow/plugins/kubernetes/kube_utils.py +29 -0
metaflow/plugins/kubernetes/kubernetes.py +4 -0
metaflow/plugins/kubernetes/kubernetes_cli.py +8 -0
metaflow/plugins/kubernetes/kubernetes_decorator.py +17 -0
metaflow/plugins/kubernetes/kubernetes_job.py +19 -13
metaflow/plugins/kubernetes/kubernetes_jobsets.py +19 -15
metaflow/runner/deployer_impl.py +4 -8
metaflow/runner/metaflow_runner.py +33 -29
metaflow/runner/subprocess_manager.py +58 -9
metaflow/runner/utils.py +185 -43
metaflow/util.py +5 -0
metaflow/version.py +1 -1
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/METADATA +2 -2
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/RECORD +27 -27
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/LICENSE +0 -0
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/WHEEL +0 -0
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/entry_points.txt +0 -0
{ob_metaflow-2.12.32.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/top_level.txt +0 -0

metaflow/flowspec.py CHANGED Viewed

@@ -38,6 +38,7 @@ INTERNAL_ARTIFACTS_SET = set(
         "_unbounded_foreach",
         "_control_mapper_tasks",
         "_control_task_is_mapper_zero",
+        "_parallel_ubf_iter",
     ]
 )

metaflow/metaflow_config.py CHANGED Viewed

@@ -378,6 +378,8 @@ KUBERNETES_PORT = from_conf("KUBERNETES_PORT", None)
 KUBERNETES_CPU = from_conf("KUBERNETES_CPU", None)
 KUBERNETES_MEMORY = from_conf("KUBERNETES_MEMORY", None)
 KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
+# Default kubernetes QoS class
+KUBERNETES_QOS = from_conf("KUBERNETES_QOS", "burstable")
 ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
 ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")

metaflow/multicore_utils.py CHANGED Viewed

@@ -6,7 +6,18 @@ from tempfile import NamedTemporaryFile
 import time
 import metaflow.tracing as tracing
-from typing import Any, Callable, Iterable, Iterator, List, Optional
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    NoReturn,
+    Tuple,
+    TypeVar,
+    Union,
+)
 try:
     # Python 2
@@ -30,7 +41,13 @@ class MulticoreException(Exception):
     pass
-def _spawn(func, arg, dir):
+_A = TypeVar("_A")
+_R = TypeVar("_R")
+def _spawn(
+    func: Callable[[_A], _R], arg: _A, dir: Optional[str]
+) -> Union[Tuple[int, str], NoReturn]:
     with NamedTemporaryFile(prefix="parallel_map_", dir=dir, delete=False) as tmpfile:
         output_file = tmpfile.name
@@ -63,11 +80,11 @@ def _spawn(func, arg, dir):
 def parallel_imap_unordered(
-    func: Callable[[Any], Any],
-    iterable: Iterable[Any],
+    func: Callable[[_A], _R],
+    iterable: Iterable[_A],
     max_parallel: Optional[int] = None,
     dir: Optional[str] = None,
-) -> Iterator[Any]:
+) -> Iterator[_R]:
     """
     Parallelizes execution of a function using multiprocessing. The result
     order is not guaranteed.
@@ -79,9 +96,9 @@ def parallel_imap_unordered(
     iterable : Iterable[Any]
         Iterable over arguments to pass to fun
     max_parallel int, optional, default None
-        Maximum parallelism. If not specified, uses the number of CPUs
+        Maximum parallelism. If not specified, it uses the number of CPUs
     dir : str, optional, default None
-        If specified, directory where temporary files are created
+        If specified, it's the directory where temporary files are created
     Yields
     ------
@@ -121,14 +138,14 @@ def parallel_imap_unordered(
 def parallel_map(
-    func: Callable[[Any], Any],
-    iterable: Iterable[Any],
+    func: Callable[[_A], _R],
+    iterable: Iterable[_A],
     max_parallel: Optional[int] = None,
     dir: Optional[str] = None,
-) -> List[Any]:
+) -> List[_R]:
     """
     Parallelizes execution of a function using multiprocessing. The result
-    order is that of the arguments in `iterable`
+    order is that of the arguments in `iterable`.
     Parameters
     ----------
@@ -137,9 +154,9 @@ def parallel_map(
     iterable : Iterable[Any]
         Iterable over arguments to pass to fun
     max_parallel int, optional, default None
-        Maximum parallelism. If not specified, uses the number of CPUs
+        Maximum parallelism. If not specified, it uses the number of CPUs
     dir : str, optional, default None
-        If specified, directory where temporary files are created
+        If specified, it's the directory where temporary files are created
     Returns
     -------
@@ -155,4 +172,4 @@ def parallel_map(
     res = parallel_imap_unordered(
         wrapper, enumerate(iterable), max_parallel=max_parallel, dir=dir
     )
-    return [r for idx, r in sorted(res)]
+    return [r for _, r in sorted(res)]

metaflow/plugins/airflow/airflow.py CHANGED Viewed

@@ -46,6 +46,7 @@ from metaflow.parameters import (
 # TODO: Move chevron to _vendor
 from metaflow.plugins.cards.card_modules import chevron
 from metaflow.plugins.kubernetes.kubernetes import Kubernetes
+from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
 from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
 from metaflow.util import compress_list, dict_to_cli_options, get_username
@@ -428,25 +429,25 @@ class Airflow(object):
             if k8s_deco.attributes["namespace"] is not None
             else "default"
         )
+        qos_requests, qos_limits = qos_requests_and_limits(
+            k8s_deco.attributes["qos"],
+            k8s_deco.attributes["cpu"],
+            k8s_deco.attributes["memory"],
+            k8s_deco.attributes["disk"],
+        )
         resources = dict(
-            requests={
-                "cpu": k8s_deco.attributes["cpu"],
-                "memory": "%sM" % str(k8s_deco.attributes["memory"]),
-                "ephemeral-storage": str(k8s_deco.attributes["disk"]),
-            }
+            requests=qos_requests,
+            limits={
+                **qos_limits,
+                **{
+                    "%s.com/gpu".lower()
+                    % k8s_deco.attributes["gpu_vendor"]: str(k8s_deco.attributes["gpu"])
+                    for k in [0]
+                    # Don't set GPU limits if gpu isn't specified.
+                    if k8s_deco.attributes["gpu"] is not None
+                },
+            },
         )
-        if k8s_deco.attributes["gpu"] is not None:
-            resources.update(
-                dict(
-                    limits={
-                        "%s.com/gpu".lower()
-                        % k8s_deco.attributes["gpu_vendor"]: str(
-                            k8s_deco.attributes["gpu"]
-                        )
-                    }
-                )
-            )
         annotations = {
             "metaflow/production_token": self.production_token,

metaflow/plugins/argo/argo_workflows.py CHANGED Viewed

@@ -54,6 +54,7 @@ from metaflow.metaflow_config import (
 from metaflow.metaflow_config_funcs import config_values, init_config
 from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
 from metaflow.parameters import deploy_time_eval
+from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
 from metaflow.plugins.kubernetes.kubernetes import (
     parse_kube_keyvalue_list,
     validate_kube_labels,
@@ -1858,6 +1859,13 @@ class ArgoWorkflows(object):
             if tmpfs_enabled and tmpfs_tempdir:
                 env["METAFLOW_TEMPDIR"] = tmpfs_path
+            qos_requests, qos_limits = qos_requests_and_limits(
+                resources["qos"],
+                resources["cpu"],
+                resources["memory"],
+                resources["disk"],
+            )
             # Create a ContainerTemplate for this node. Ideally, we would have
             # liked to inline this ContainerTemplate and avoid scanning the workflow
             # twice, but due to issues with variable substitution, we will have to
@@ -1921,6 +1929,7 @@ class ArgoWorkflows(object):
                     persistent_volume_claims=resources["persistent_volume_claims"],
                     shared_memory=shared_memory,
                     port=port,
+                    qos=resources["qos"],
                 )
                 for k, v in env.items():
@@ -2113,17 +2122,17 @@ class ArgoWorkflows(object):
                                 image=resources["image"],
                                 image_pull_policy=resources["image_pull_policy"],
                                 resources=kubernetes_sdk.V1ResourceRequirements(
-                                    requests={
-                                        "cpu": str(resources["cpu"]),
-                                        "memory": "%sM" % str(resources["memory"]),
-                                        "ephemeral-storage": "%sM"
-                                        % str(resources["disk"]),
-                                    },
+                                    requests=qos_requests,
                                     limits={
-                                        "%s.com/gpu".lower()
-                                        % resources["gpu_vendor"]: str(resources["gpu"])
-                                        for k in [0]
-                                        if resources["gpu"] is not None
+                                        **qos_limits,
+                                        **{
+                                            "%s.com/gpu".lower()
+                                            % resources["gpu_vendor"]: str(
+                                                resources["gpu"]
+                                            )
+                                            for k in [0]
+                                            if resources["gpu"] is not None
+                                        },
                                     },
                                 ),
                                 # Configure secrets
@@ -2360,7 +2369,7 @@ class ArgoWorkflows(object):
                                 "memory": "500Mi",
                             },
                         ),
-                    )
+                    ).to_dict()
                 )
             ),
             Template("capture-error-hook-fn-preflight").steps(
@@ -2719,7 +2728,7 @@ class ArgoWorkflows(object):
                             },
                         ),
                     )
-                )
+                ).to_dict()
             )
         )
@@ -2889,7 +2898,7 @@ class ArgoWorkflows(object):
                                         "memory": "250Mi",
                                     },
                                 ),
-                            )
+                            ).to_dict()
                         )
                     )
                     .service_account_name(ARGO_EVENTS_SERVICE_ACCOUNT)

metaflow/plugins/argo/argo_workflows_deployer_objects.py CHANGED Viewed

@@ -10,7 +10,7 @@ from metaflow.metaflow_config import KUBERNETES_NAMESPACE
 from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
 from metaflow.runner.deployer import Deployer, DeployedFlow, TriggeredRun
-from metaflow.runner.utils import get_lower_level_group, handle_timeout
+from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
 def generate_fake_flow_file_contents(
@@ -341,18 +341,14 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
         Exception
             If there is an error during the trigger process.
         """
-        with tempfile.TemporaryDirectory() as temp_dir:
-            tfp_runner_attribute = tempfile.NamedTemporaryFile(
-                dir=temp_dir, delete=False
-            )
+        with temporary_fifo() as (attribute_file_path, attribute_file_fd):
             # every subclass needs to have `self.deployer_kwargs`
             command = get_lower_level_group(
                 self.deployer.api,
                 self.deployer.top_level_kwargs,
                 self.deployer.TYPE,
                 self.deployer.deployer_kwargs,
-            ).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
+            ).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
             pid = self.deployer.spm.run_command(
                 [sys.executable, *command],
@@ -363,7 +359,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
             command_obj = self.deployer.spm.get(pid)
             content = handle_timeout(
-                tfp_runner_attribute, command_obj, self.deployer.file_read_timeout
+                attribute_file_fd, command_obj, self.deployer.file_read_timeout
             )
             if command_obj.process.returncode == 0:

metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import ClassVar, Optional, List
 from metaflow.plugins.aws.step_functions.step_functions import StepFunctions
 from metaflow.runner.deployer import DeployedFlow, TriggeredRun
-from metaflow.runner.utils import get_lower_level_group, handle_timeout
+from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
 class StepFunctionsTriggeredRun(TriggeredRun):
@@ -196,18 +196,14 @@ class StepFunctionsDeployedFlow(DeployedFlow):
         Exception
             If there is an error during the trigger process.
         """
-        with tempfile.TemporaryDirectory() as temp_dir:
-            tfp_runner_attribute = tempfile.NamedTemporaryFile(
-                dir=temp_dir, delete=False
-            )
+        with temporary_fifo() as (attribute_file_path, attribute_file_fd):
             # every subclass needs to have `self.deployer_kwargs`
             command = get_lower_level_group(
                 self.deployer.api,
                 self.deployer.top_level_kwargs,
                 self.deployer.TYPE,
                 self.deployer.deployer_kwargs,
-            ).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
+            ).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
             pid = self.deployer.spm.run_command(
                 [sys.executable, *command],
@@ -218,7 +214,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
             command_obj = self.deployer.spm.get(pid)
             content = handle_timeout(
-                tfp_runner_attribute, command_obj, self.deployer.file_read_timeout
+                attribute_file_fd, command_obj, self.deployer.file_read_timeout
             )
             if command_obj.process.returncode == 0:

metaflow/plugins/datatools/s3/s3.py CHANGED Viewed

@@ -600,7 +600,9 @@ class S3(object):
         # returned are Unicode.
         key = getattr(key_value, "key", key_value)
         if self._s3root is None:
-            parsed = urlparse(to_unicode(key))
+            # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
+            # Without allow_fragments=False the parsed.path for an object name with fragments is incomplete.
+            parsed = urlparse(to_unicode(key), allow_fragments=False)
             if parsed.scheme == "s3" and parsed.path:
                 return key
             else:
@@ -765,7 +767,9 @@ class S3(object):
         """
         url = self._url(key)
-        src = urlparse(url)
+        # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
+        # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
+        src = urlparse(url, allow_fragments=False)
         def _info(s3, tmp):
             resp = s3.head_object(Bucket=src.netloc, Key=src.path.lstrip('/"'))
@@ -891,7 +895,9 @@ class S3(object):
         DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
         url, r = self._url_and_range(key)
-        src = urlparse(url)
+        # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
+        # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
+        src = urlparse(url, allow_fragments=False)
         def _download(s3, tmp):
             if r:
@@ -1173,7 +1179,9 @@ class S3(object):
         blob.close = lambda: None
         url = self._url(key)
-        src = urlparse(url)
+        # NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
+        # Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
+        src = urlparse(url, allow_fragments=False)
         extra_args = None
         if content_type or metadata or self._encryption:
             extra_args = {}

metaflow/plugins/events_decorator.py CHANGED Viewed

@@ -170,7 +170,7 @@ class TriggerDecorator(FlowDecorator):
                 # process every event in events
                 for event in self.attributes["events"]:
                     processed_event = self.process_event_name(event)
-                    self.triggers.append("processed event", processed_event)
+                    self.triggers.append(processed_event)
             elif callable(self.attributes["events"]) and not isinstance(
                 self.attributes["events"], DeployTimeField
             ):

metaflow/plugins/kubernetes/kube_utils.py CHANGED Viewed

@@ -23,3 +23,32 @@ def parse_cli_options(flow_name, run_id, user, my_runs, echo):
             raise CommandException("A previous run id was not found. Specify --run-id.")
     return flow_name, run_id, user
+def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int):
+    "return resource requests and limits for the kubernetes pod based on the given QoS Class"
+    # case insensitive matching for QoS class
+    qos = qos.lower()
+    # Determine the requests and limits to define chosen QoS class
+    qos_limits = {}
+    qos_requests = {}
+    if qos == "guaranteed":
+        # Guaranteed - has both cpu/memory limits. requests not required, as these will be inferred.
+        qos_limits = {
+            "cpu": str(cpu),
+            "memory": "%sM" % str(memory),
+            "ephemeral-storage": "%sM" % str(storage),
+        }
+        # NOTE: Even though Kubernetes will produce matching requests for the specified limits, this happens late in the lifecycle.
+        # We specify them explicitly here to make some K8S tooling happy, in case they rely on .resources.requests being present at time of submitting the job.
+        qos_requests = qos_limits
+    else:
+        # Burstable - not Guaranteed, and has a memory/cpu limit or request
+        qos_requests = {
+            "cpu": str(cpu),
+            "memory": "%sM" % str(memory),
+            "ephemeral-storage": "%sM" % str(storage),
+        }
+    # TODO: Add support for BestEffort once there is a use case for it.
+    # BestEffort - no limit or requests for cpu/memory
+    return qos_requests, qos_limits

metaflow/plugins/kubernetes/kubernetes.py CHANGED Viewed

@@ -196,6 +196,7 @@ class Kubernetes(object):
         shared_memory=None,
         port=None,
         num_parallel=None,
+        qos=None,
     ):
         name = "js-%s" % str(uuid4())[:6]
         jobset = (
@@ -228,6 +229,7 @@ class Kubernetes(object):
                 shared_memory=shared_memory,
                 port=port,
                 num_parallel=num_parallel,
+                qos=qos,
             )
             .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
             .environment_variable("METAFLOW_CODE_URL", code_package_url)
@@ -504,6 +506,7 @@ class Kubernetes(object):
         shared_memory=None,
         port=None,
         name_pattern=None,
+        qos=None,
     ):
         if env is None:
             env = {}
@@ -544,6 +547,7 @@ class Kubernetes(object):
                 persistent_volume_claims=persistent_volume_claims,
                 shared_memory=shared_memory,
                 port=port,
+                qos=qos,
             )
             .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
             .environment_variable("METAFLOW_CODE_URL", code_package_url)

metaflow/plugins/kubernetes/kubernetes_cli.py CHANGED Viewed

@@ -126,6 +126,12 @@ def kubernetes():
     type=int,
     help="Number of parallel nodes to run as a multi-node job.",
 )
+@click.option(
+    "--qos",
+    default=None,
+    type=str,
+    help="Quality of Service class for the Kubernetes pod",
+)
 @click.pass_context
 def step(
     ctx,
@@ -154,6 +160,7 @@ def step(
     shared_memory=None,
     port=None,
     num_parallel=None,
+    qos=None,
     **kwargs
 ):
     def echo(msg, stream="stderr", job_id=None, **kwargs):
@@ -294,6 +301,7 @@ def step(
                 shared_memory=shared_memory,
                 port=port,
                 num_parallel=num_parallel,
+                qos=qos,
             )
     except Exception as e:
         traceback.print_exc(chain=False)

metaflow/plugins/kubernetes/kubernetes_decorator.py CHANGED Viewed

@@ -26,6 +26,7 @@ from metaflow.metaflow_config import (
     KUBERNETES_SERVICE_ACCOUNT,
     KUBERNETES_SHARED_MEMORY,
     KUBERNETES_TOLERATIONS,
+    KUBERNETES_QOS,
 )
 from metaflow.plugins.resources_decorator import ResourcesDecorator
 from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
@@ -43,6 +44,8 @@ except NameError:
     unicode = str
     basestring = str
+SUPPORTED_KUBERNETES_QOS_CLASSES = ["Guaranteed", "Burstable"]
 class KubernetesDecorator(StepDecorator):
     """
@@ -111,6 +114,8 @@ class KubernetesDecorator(StepDecorator):
     hostname_resolution_timeout: int, default 10 * 60
         Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
         Only applicable when @parallel is used.
+    qos: str, default: Burstable
+        Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
     """
     name = "kubernetes"
@@ -138,6 +143,7 @@ class KubernetesDecorator(StepDecorator):
         "compute_pool": None,
         "executable": None,
         "hostname_resolution_timeout": 10 * 60,
+        "qos": KUBERNETES_QOS,
     }
     package_url = None
     package_sha = None
@@ -261,6 +267,17 @@ class KubernetesDecorator(StepDecorator):
         self.step = step
         self.flow_datastore = flow_datastore
+        if (
+            self.attributes["qos"] is not None
+            # case insensitive matching.
+            and self.attributes["qos"].lower()
+            not in [c.lower() for c in SUPPORTED_KUBERNETES_QOS_CLASSES]
+        ):
+            raise MetaflowException(
+                "*%s* is not a valid Kubernetes QoS class. Choose one of the following: %s"
+                % (self.attributes["qos"], ", ".join(SUPPORTED_KUBERNETES_QOS_CLASSES))
+            )
         if any([deco.name == "batch" for deco in decos]):
             raise MetaflowException(
                 "Step *{step}* is marked for execution both on AWS Batch and "

metaflow/plugins/kubernetes/kubernetes_job.py CHANGED Viewed

@@ -16,6 +16,8 @@ from .kubernetes_jobsets import (
     KubernetesJobSet,
 )  # We need this import for Kubernetes Client.
+from .kube_utils import qos_requests_and_limits
 class KubernetesJobException(MetaflowException):
     headline = "Kubernetes job error"
@@ -75,6 +77,12 @@ class KubernetesJob(object):
             if self._kwargs["shared_memory"]
             else None
         )
+        qos_requests, qos_limits = qos_requests_and_limits(
+            self._kwargs["qos"],
+            self._kwargs["cpu"],
+            self._kwargs["memory"],
+            self._kwargs["disk"],
+        )
         initial_configs = init_config()
         for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_SECRETS_METADATA_URL"]:
             if entry not in initial_configs:
@@ -176,20 +184,18 @@ class KubernetesJob(object):
                             image_pull_policy=self._kwargs["image_pull_policy"],
                             name=self._kwargs["step_name"].replace("_", "-"),
                             resources=client.V1ResourceRequirements(
-                                requests={
-                                    "cpu": str(self._kwargs["cpu"]),
-                                    "memory": "%sM" % str(self._kwargs["memory"]),
-                                    "ephemeral-storage": "%sM"
-                                    % str(self._kwargs["disk"]),
-                                },
+                                requests=qos_requests,
                                 limits={
-                                    "%s.com/gpu".lower()
-                                    % self._kwargs["gpu_vendor"]: str(
-                                        self._kwargs["gpu"]
-                                    )
-                                    for k in [0]
-                                    # Don't set GPU limits if gpu isn't specified.
-                                    if self._kwargs["gpu"] is not None
+                                    **qos_limits,
+                                    **{
+                                        "%s.com/gpu".lower()
+                                        % self._kwargs["gpu_vendor"]: str(
+                                            self._kwargs["gpu"]
+                                        )
+                                        for k in [0]
+                                        # Don't set GPU limits if gpu isn't specified.
+                                        if self._kwargs["gpu"] is not None
+                                    },
                                 },
                             ),
                             volume_mounts=(

metaflow/plugins/kubernetes/kubernetes_jobsets.py CHANGED Viewed

@@ -9,6 +9,8 @@ from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_
 from metaflow.tracing import inject_tracing_vars
 from metaflow.metaflow_config import KUBERNETES_SECRETS
+from .kube_utils import qos_requests_and_limits
 class KubernetesJobsetException(MetaflowException):
     headline = "Kubernetes jobset error"
@@ -554,7 +556,12 @@ class JobSetSpec(object):
             if self._kwargs["shared_memory"]
             else None
         )
+        qos_requests, qos_limits = qos_requests_and_limits(
+            self._kwargs["qos"],
+            self._kwargs["cpu"],
+            self._kwargs["memory"],
+            self._kwargs["disk"],
+        )
         return dict(
             name=self.name,
             template=client.api_client.ApiClient().sanitize_for_serialization(
@@ -653,21 +660,18 @@ class JobSetSpec(object):
                                             "_", "-"
                                         ),
                                         resources=client.V1ResourceRequirements(
-                                            requests={
-                                                "cpu": str(self._kwargs["cpu"]),
-                                                "memory": "%sM"
-                                                % str(self._kwargs["memory"]),
-                                                "ephemeral-storage": "%sM"
-                                                % str(self._kwargs["disk"]),
-                                            },
+                                            requests=qos_requests,
                                             limits={
-                                                "%s.com/gpu".lower()
-                                                % self._kwargs["gpu_vendor"]: str(
-                                                    self._kwargs["gpu"]
-                                                )
-                                                for k in [0]
-                                                # Don't set GPU limits if gpu isn't specified.
-                                                if self._kwargs["gpu"] is not None
+                                                **qos_limits,
+                                                **{
+                                                    "%s.com/gpu".lower()
+                                                    % self._kwargs["gpu_vendor"]: str(
+                                                        self._kwargs["gpu"]
+                                                    )
+                                                    for k in [0]
+                                                    # Don't set GPU limits if gpu isn't specified.
+                                                    if self._kwargs["gpu"] is not None
+                                                },
                                             },
                                         ),
                                         volume_mounts=(

ob-metaflow 2.12.32.1__py2.py3-none-any.whl → 2.12.35.1__py2.py3-none-any.whl

Potentially problematic release.

ob-metaflow 2.12.32.1py2.py3-none-any.whl → 2.12.35.1py2.py3-none-any.whl