PyPI - ob-metaflow - Versions diffs - 2.12.36.3__py2.py3-none-any.whl → 2.12.39.1__py2.py3-none-any.whl - Mend

ob-metaflow 2.12.36.3py2.py3-none-any.whl → 2.12.39.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ob-metaflow might be problematic. Click here for more details.

Files changed (58) hide show

metaflow/__init__.py +3 -0
metaflow/cli.py +84 -697
metaflow/cli_args.py +17 -0
metaflow/cli_components/__init__.py +0 -0
metaflow/cli_components/dump_cmd.py +96 -0
metaflow/cli_components/init_cmd.py +51 -0
metaflow/cli_components/run_cmds.py +358 -0
metaflow/cli_components/step_cmd.py +189 -0
metaflow/cli_components/utils.py +140 -0
metaflow/cmd/develop/stub_generator.py +9 -2
metaflow/decorators.py +63 -2
metaflow/extension_support/plugins.py +41 -27
metaflow/flowspec.py +156 -16
metaflow/includefile.py +50 -22
metaflow/metaflow_config.py +1 -1
metaflow/package.py +17 -3
metaflow/parameters.py +80 -23
metaflow/plugins/__init__.py +4 -0
metaflow/plugins/airflow/airflow_cli.py +1 -0
metaflow/plugins/argo/argo_workflows.py +41 -1
metaflow/plugins/argo/argo_workflows_cli.py +1 -0
metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
metaflow/plugins/aws/batch/batch_decorator.py +2 -2
metaflow/plugins/aws/step_functions/step_functions.py +32 -0
metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
metaflow/plugins/datatools/s3/s3op.py +3 -3
metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
metaflow/plugins/pypi/bootstrap.py +196 -61
metaflow/plugins/pypi/conda_decorator.py +20 -10
metaflow/plugins/pypi/conda_environment.py +76 -21
metaflow/plugins/pypi/micromamba.py +42 -15
metaflow/plugins/pypi/pip.py +8 -3
metaflow/plugins/pypi/pypi_decorator.py +11 -9
metaflow/plugins/timeout_decorator.py +2 -2
metaflow/runner/click_api.py +73 -19
metaflow/runner/deployer.py +1 -1
metaflow/runner/deployer_impl.py +2 -2
metaflow/runner/metaflow_runner.py +4 -1
metaflow/runner/nbdeploy.py +2 -0
metaflow/runner/nbrun.py +1 -1
metaflow/runner/subprocess_manager.py +3 -1
metaflow/runner/utils.py +41 -19
metaflow/runtime.py +111 -73
metaflow/sidecar/sidecar_worker.py +1 -1
metaflow/user_configs/__init__.py +0 -0
metaflow/user_configs/config_decorators.py +563 -0
metaflow/user_configs/config_options.py +495 -0
metaflow/user_configs/config_parameters.py +386 -0
metaflow/util.py +17 -0
metaflow/version.py +1 -1
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/METADATA +3 -2
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/RECORD +58 -48
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/LICENSE +0 -0
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/WHEEL +0 -0
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/entry_points.txt +0 -0
{ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/top_level.txt +0 -0

metaflow/plugins/argo/argo_workflows.py CHANGED Viewed

@@ -61,6 +61,7 @@ from metaflow.plugins.kubernetes.kubernetes import (
 )
 from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
 from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
+from metaflow.user_configs.config_options import ConfigInput
 from metaflow.util import (
     compress_list,
     dict_to_cli_options,
@@ -169,6 +170,7 @@ class ArgoWorkflows(object):
         self.enable_heartbeat_daemon = enable_heartbeat_daemon
         self.enable_error_msg_capture = enable_error_msg_capture
         self.parameters = self._process_parameters()
+        self.config_parameters = self._process_config_parameters()
         self.triggers, self.trigger_options = self._process_triggers()
         self._schedule, self._timezone = self._get_schedule()
@@ -456,6 +458,10 @@ class ArgoWorkflows(object):
                     "case-insensitive." % param.name
                 )
             seen.add(norm)
+            # NOTE: We skip config parameters as these do not have dynamic values,
+            # and need to be treated differently.
+            if param.IS_CONFIG_PARAMETER:
+                continue
             extra_attrs = {}
             if param.kwargs.get("type") == JSONType:
@@ -489,6 +495,7 @@ class ArgoWorkflows(object):
             # execution - which needs to be fixed imminently.
             if not is_required or default_value is not None:
                 default_value = json.dumps(default_value)
             parameters[param.name] = dict(
                 name=param.name,
                 value=default_value,
@@ -499,6 +506,27 @@ class ArgoWorkflows(object):
             )
         return parameters
+    def _process_config_parameters(self):
+        parameters = []
+        seen = set()
+        for var, param in self.flow._get_parameters():
+            if not param.IS_CONFIG_PARAMETER:
+                continue
+            # Throw an exception if the parameter is specified twice.
+            norm = param.name.lower()
+            if norm in seen:
+                raise MetaflowException(
+                    "Parameter *%s* is specified twice. "
+                    "Note that parameter names are "
+                    "case-insensitive." % param.name
+                )
+            seen.add(norm)
+            parameters.append(
+                dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
+            )
+        return parameters
     def _process_triggers(self):
         # Impute triggers for Argo Workflow Template specified through @trigger and
         # @trigger_on_finish decorators
@@ -521,8 +549,13 @@ class ArgoWorkflows(object):
             # convert them to lower case since Metaflow parameters are case
             # insensitive.
             seen = set()
+            # NOTE: We skip config parameters as their values can not be set through event payloads
             params = set(
-                [param.name.lower() for var, param in self.flow._get_parameters()]
+                [
+                    param.name.lower()
+                    for var, param in self.flow._get_parameters()
+                    if not param.IS_CONFIG_PARAMETER
+                ]
             )
             trigger_deco = self.flow._flow_decorators.get("trigger")[0]
             trigger_deco.format_deploytime_value()
@@ -1721,6 +1754,13 @@ class ArgoWorkflows(object):
             metaflow_version["production_token"] = self.production_token
             env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
+            # map config values
+            cfg_env = {
+                param["name"]: param["kv_name"] for param in self.config_parameters
+            }
+            if cfg_env:
+                env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
             # Set the template inputs and outputs for passing state. Very simply,
             # the container template takes in input-paths as input and outputs
             # the task-id (which feeds in as input-paths to the subsequent task).

metaflow/plugins/argo/argo_workflows_cli.py CHANGED Viewed

@@ -470,6 +470,7 @@ def make_flow(
     decorators._attach_decorators(
         obj.flow, [KubernetesDecorator.name, EnvironmentDecorator.name]
     )
+    decorators._init(obj.flow)
     decorators._init_step_decorators(
         obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger

metaflow/plugins/argo/argo_workflows_deployer_objects.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import sys
 import json
+import time
 import tempfile
 from typing import ClassVar, Optional
@@ -97,6 +98,7 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
     def unsuspend(self, **kwargs) -> bool:
@@ -131,6 +133,7 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
     def terminate(self, **kwargs) -> bool:
@@ -165,8 +168,50 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
+    def wait_for_completion(self, timeout: Optional[int] = None):
+        """
+        Wait for the workflow to complete or timeout.
+        Parameters
+        ----------
+        timeout : int, optional, default None
+            Maximum time in seconds to wait for workflow completion.
+            If None, waits indefinitely.
+        Raises
+        ------
+        TimeoutError
+            If the workflow does not complete within the specified timeout period.
+        """
+        start_time = time.time()
+        check_interval = 5
+        while self.is_running:
+            if timeout is not None and (time.time() - start_time) > timeout:
+                raise TimeoutError(
+                    "Workflow did not complete within specified timeout."
+                )
+            time.sleep(check_interval)
+    @property
+    def is_running(self):
+        """
+        Check if the workflow is currently running.
+        Returns
+        -------
+        bool
+            True if the workflow status is either 'Pending' or 'Running',
+            False otherwise.
+        """
+        workflow_status = self.status
+        # full list of all states present here:
+        # https://github.com/argoproj/argo-workflows/blob/main/pkg/apis/workflow/v1alpha1/workflow_types.go#L54
+        # we only consider non-terminal states to determine if the workflow has not finished
+        return workflow_status is not None and workflow_status in ["Pending", "Running"]
     @property
     def status(self) -> Optional[str]:
         """
@@ -319,6 +364,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
     def trigger(self, **kwargs) -> ArgoWorkflowsTriggeredRun:
@@ -361,7 +407,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
             content = handle_timeout(
                 attribute_file_fd, command_obj, self.deployer.file_read_timeout
             )
+            command_obj.sync_wait()
             if command_obj.process.returncode == 0:
                 return ArgoWorkflowsTriggeredRun(
                     deployer=self.deployer, content=content

metaflow/plugins/aws/batch/batch_decorator.py CHANGED Viewed

@@ -138,8 +138,8 @@ class BatchDecorator(StepDecorator):
     supports_conda_environment = True
     target_platform = "linux-64"
-    def __init__(self, attributes=None, statically_defined=False):
-        super(BatchDecorator, self).__init__(attributes, statically_defined)
+    def init(self):
+        super(BatchDecorator, self).init()
         # If no docker image is explicitly specified, impute a default image.
         if not self.attributes["image"]:

metaflow/plugins/aws/step_functions/step_functions.py CHANGED Viewed

@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
     SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
 )
 from metaflow.parameters import deploy_time_eval
+from metaflow.user_configs.config_options import ConfigInput
 from metaflow.util import dict_to_cli_options, to_pascalcase
 from ..batch.batch import Batch
@@ -71,6 +72,7 @@ class StepFunctions(object):
         self.username = username
         self.max_workers = max_workers
         self.workflow_timeout = workflow_timeout
+        self.config_parameters = self._process_config_parameters()
         # https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
         self.use_distributed_map = use_distributed_map
@@ -485,6 +487,10 @@ class StepFunctions(object):
                     "case-insensitive." % param.name
                 )
             seen.add(norm)
+            # NOTE: We skip config parameters as these do not have dynamic values,
+            # and need to be treated differently.
+            if param.IS_CONFIG_PARAMETER:
+                continue
             is_required = param.kwargs.get("required", False)
             # Throw an exception if a schedule is set for a flow with required
@@ -501,6 +507,27 @@ class StepFunctions(object):
             parameters.append(dict(name=param.name, value=value))
         return parameters
+    def _process_config_parameters(self):
+        parameters = []
+        seen = set()
+        for var, param in self.flow._get_parameters():
+            if not param.IS_CONFIG_PARAMETER:
+                continue
+            # Throw an exception if the parameter is specified twice.
+            norm = param.name.lower()
+            if norm in seen:
+                raise MetaflowException(
+                    "Parameter *%s* is specified twice. "
+                    "Note that parameter names are "
+                    "case-insensitive." % param.name
+                )
+            seen.add(norm)
+            parameters.append(
+                dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
+            )
+        return parameters
     def _batch(self, node):
         attrs = {
             # metaflow.user is only used for setting the AWS Job Name.
@@ -747,6 +774,11 @@ class StepFunctions(object):
         metaflow_version["production_token"] = self.production_token
         env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
+        # map config values
+        cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
+        if cfg_env:
+            env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
         # Set AWS DynamoDb Table Name for state tracking for for-eaches.
         # There are three instances when metaflow runtime directly interacts
         # with AWS DynamoDB.

metaflow/plugins/aws/step_functions/step_functions_cli.py CHANGED Viewed

@@ -326,6 +326,7 @@ def make_flow(
     # Attach AWS Batch decorator to the flow
     decorators._attach_decorators(obj.flow, [BatchDecorator.name])
+    decorators._init(obj.flow)
     decorators._init_step_decorators(
         obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
     )

metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py CHANGED Viewed

@@ -46,6 +46,7 @@ class StepFunctionsTriggeredRun(TriggeredRun):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
@@ -174,6 +175,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
         )
         command_obj = self.deployer.spm.get(pid)
+        command_obj.sync_wait()
         return command_obj.process.returncode == 0
     def trigger(self, **kwargs) -> StepFunctionsTriggeredRun:
@@ -217,6 +219,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
                 attribute_file_fd, command_obj, self.deployer.file_read_timeout
             )
+            command_obj.sync_wait()
             if command_obj.process.returncode == 0:
                 return StepFunctionsTriggeredRun(
                     deployer=self.deployer, content=content

metaflow/plugins/datatools/s3/s3op.py CHANGED Viewed

@@ -722,8 +722,8 @@ def cli():
     pass
-@tracing.cli_entrypoint("s3op/list")
 @cli.command("list", help="List S3 objects")
+@tracing.cli_entrypoint("s3op/list")
 @click.option(
     "--recursive/--no-recursive",
     default=False,
@@ -782,8 +782,8 @@ def lst(
             print(format_result_line(idx, url.prefix, url.url, str(size)))
-@tracing.cli_entrypoint("s3op/put")
 @cli.command(help="Upload files to S3")
+@tracing.cli_entrypoint("s3op/put")
 @click.option(
     "--file",
     "files",
@@ -977,8 +977,8 @@ def _populate_prefixes(prefixes, inputs):
     return prefixes, is_transient_retry
-@tracing.cli_entrypoint("s3op/get")
 @cli.command(help="Download files from S3")
+@tracing.cli_entrypoint("s3op/get")
 @click.option(
     "--recursive/--no-recursive",
     default=False,

metaflow/plugins/kubernetes/kubernetes_cli.py CHANGED Viewed

@@ -33,12 +33,12 @@ def kubernetes():
     pass
-@tracing.cli_entrypoint("kubernetes/step")
 @kubernetes.command(
     help="Execute a single task on Kubernetes. This command calls the top-level step "
     "command inside a Kubernetes pod with the given options. Typically you do not call "
     "this command directly; it is used internally by Metaflow."
 )
+@tracing.cli_entrypoint("kubernetes/step")
 @click.argument("step-name")
 @click.argument("code-package-sha")
 @click.argument("code-package-url")

metaflow/plugins/kubernetes/kubernetes_decorator.py CHANGED Viewed

@@ -153,8 +153,8 @@ class KubernetesDecorator(StepDecorator):
     supports_conda_environment = True
     target_platform = "linux-64"
-    def __init__(self, attributes=None, statically_defined=False):
-        super(KubernetesDecorator, self).__init__(attributes, statically_defined)
+    def init(self):
+        super(KubernetesDecorator, self).init()
         if not self.attributes["namespace"]:
             self.attributes["namespace"] = KUBERNETES_NAMESPACE

metaflow/plugins/pypi/bootstrap.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import bz2
+import concurrent.futures
 import io
 import json
 import os
@@ -6,6 +7,9 @@ import shutil
 import subprocess
 import sys
 import tarfile
+import time
+import requests
 from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
 from metaflow.plugins import DATASTORES
@@ -15,6 +19,18 @@ from . import MAGIC_FILE, _datastore_packageroot
 # Bootstraps a valid conda virtual environment composed of conda and pypi packages
+def timer(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        duration = time.time() - start_time
+        # print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
+        return result
+    return wrapper
 if __name__ == "__main__":
     if len(sys.argv) != 5:
         print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
@@ -47,6 +63,8 @@ if __name__ == "__main__":
     prefix = os.path.join(os.getcwd(), architecture, id_)
     pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
+    conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
+    pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
     manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
     datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
@@ -64,77 +82,194 @@ if __name__ == "__main__":
         os.path.join(os.getcwd(), MAGIC_FILE),
         os.path.join(manifest_dir, MAGIC_FILE),
     )
     with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
         env = json.load(f)[id_][architecture]
-    # Download Conda packages.
-    conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
-    with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
-        for key, tmpfile, _ in results:
+    def run_cmd(cmd):
+        result = subprocess.run(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        if result.returncode != 0:
+            print(f"Bootstrap failed while executing: {cmd}")
+            print("Stdout:", result.stdout)
+            print("Stderr:", result.stderr)
+            sys.exit(1)
+    @timer
+    def install_micromamba(architecture):
+        micromamba_dir = os.path.join(os.getcwd(), "micromamba")
+        micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
+        if which("micromamba"):
+            return which("micromamba")
+        if os.path.exists(micromamba_path):
+            os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
+            return micromamba_path
+        # Download and extract in one go
+        # TODO: Serve from cloudflare
+        url = f"https://micro.mamba.pm/api/micromamba/{architecture}/2.0.4"
+        # Prepare directory once
+        os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
+        # Stream and process directly to file
+        with requests.get(url, stream=True, timeout=30) as response:
+            if response.status_code != 200:
+                raise Exception(
+                    f"Failed to download micromamba: HTTP {response.status_code}"
+                )
+            decompressor = bz2.BZ2Decompressor()
+            # Process in memory without temporary files
+            tar_content = decompressor.decompress(response.raw.read())
+            with tarfile.open(fileobj=io.BytesIO(tar_content), mode="r:") as tar:
+                member = tar.getmember("bin/micromamba")
+                # Extract directly to final location
+                with open(micromamba_path, "wb") as f:
+                    f.write(tar.extractfile(member).read())
+        # Set executable permission
+        os.chmod(micromamba_path, 0o755)
+        # Update PATH only once at the end
+        os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
+        return micromamba_path
+    @timer
+    def download_conda_packages(storage, packages, dest_dir):
+        def process_conda_package(args):
             # Ensure that conda packages go into architecture specific folders.
             # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
             # Micromamba into believing that all packages are coming from a local
             # channel - the only hurdle is ensuring that packages are organised
             # properly.
-            # TODO: consider RAM disk
-            dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
+            key, tmpfile, dest_dir = args
+            dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
             os.makedirs(os.path.dirname(dest), exist_ok=True)
             shutil.move(tmpfile, dest)
-    # Create Conda environment.
-    cmds = [
-        # TODO: check if mamba or conda are already available on the image
-        # TODO: micromamba installation can be pawned off to micromamba.py
-        f"""set -e;
-        if ! command -v micromamba >/dev/null 2>&1; then
-            mkdir -p micromamba;
-            python -c "import requests, bz2, sys; data = requests.get('https://micro.mamba.pm/api/micromamba/{architecture}/1.5.7').content; sys.stdout.buffer.write(bz2.decompress(data))" | tar -xv -C $(pwd)/micromamba bin/micromamba --strip-components 1;
+        os.makedirs(dest_dir, exist_ok=True)
+        with storage.load_bytes([package["path"] for package in packages]) as results:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                executor.map(
+                    process_conda_package,
+                    [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
+                )
+            # for key, tmpfile, _ in results:
+            #     # TODO: consider RAM disk
+            #     dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
+            #     os.makedirs(os.path.dirname(dest), exist_ok=True)
+            #     shutil.move(tmpfile, dest)
+        return dest_dir
+    @timer
+    def download_pypi_packages(storage, packages, dest_dir):
+        def process_pypi_package(args):
+            key, tmpfile, dest_dir = args
+            dest = os.path.join(dest_dir, os.path.basename(key))
+            shutil.move(tmpfile, dest)
+        os.makedirs(dest_dir, exist_ok=True)
+        with storage.load_bytes([package["path"] for package in packages]) as results:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                executor.map(
+                    process_pypi_package,
+                    [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
+                )
+            # for key, tmpfile, _ in results:
+            #     dest = os.path.join(dest_dir, os.path.basename(key))
+            #     shutil.move(tmpfile, dest)
+        return dest_dir
+    @timer
+    def create_conda_environment(prefix, conda_pkgs_dir):
+        cmd = f'''set -e;
+            tmpfile=$(mktemp);
+            echo "@EXPLICIT" > "$tmpfile";
+            ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
             export PATH=$PATH:$(pwd)/micromamba;
-            if ! command -v micromamba >/dev/null 2>&1; then
-                echo "Failed to install Micromamba!";
-                exit 1;
-            fi;
-        fi""",
-        # Create a conda environment through Micromamba.
-        f'''set -e;
-        tmpfile=$(mktemp);
-        echo "@EXPLICIT" > "$tmpfile";
-        ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
-        export PATH=$PATH:$(pwd)/micromamba;
-        export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
-        micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
-        rm "$tmpfile"''',
-    ]
-    # Download PyPI packages.
-    if "pypi" in env:
-        pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
-        with storage.load_bytes(
-            [package["path"] for package in env["pypi"]]
-        ) as results:
-            for key, tmpfile, _ in results:
-                dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
-                os.makedirs(os.path.dirname(dest), exist_ok=True)
-                shutil.move(tmpfile, dest)
-        # Install PyPI packages.
-        cmds.extend(
-            [
-                f"""set -e;
-                export PATH=$PATH:$(pwd)/micromamba;
-                export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
-                micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
-            ]
-        )
+            export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
+            export MAMBA_NO_LOW_SPEED_LIMIT=1;
+            export MAMBA_USE_INDEX_CACHE=1;
+            export MAMBA_NO_PROGRESS_BARS=1;
+            export CONDA_FETCH_THREADS=1;
+            micromamba create --yes --offline --no-deps \
+                --safety-checks=disabled --no-extra-safety-checks \
+                --prefix {prefix} --file "$tmpfile" \
+                --no-pyc --no-rc --always-copy;
+            rm "$tmpfile"'''
+        run_cmd(cmd)
-    for cmd in cmds:
-        result = subprocess.run(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-        )
-        if result.returncode != 0:
-            print(f"Bootstrap failed while executing: {cmd}")
-            print("Stdout:", result.stdout.decode())
-            print("Stderr:", result.stderr.decode())
-            sys.exit(1)
+    @timer
+    def install_pypi_packages(prefix, pypi_pkgs_dir):
+        cmd = f"""set -e;
+            export PATH=$PATH:$(pwd)/micromamba;
+            export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
+            micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
+                install --root-user-action=ignore --no-compile --no-index \
+                --no-cache-dir --no-deps --prefer-binary \
+                --find-links={pypi_pkgs_dir}  --no-user \
+                --no-warn-script-location --no-input \
+                {pypi_pkgs_dir}/*.whl
+            """
+        run_cmd(cmd)
+    @timer
+    def setup_environment(
+        architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
+    ):
+        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+            # install micromamba, download conda and pypi packages in parallel
+            futures = {
+                "micromamba": executor.submit(install_micromamba, architecture),
+                "conda_pkgs": executor.submit(
+                    download_conda_packages, storage, env["conda"], conda_pkgs_dir
+                ),
+            }
+            if "pypi" in env:
+                futures["pypi_pkgs"] = executor.submit(
+                    download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
+                )
+            # create conda environment after micromamba is installed and conda packages are downloaded
+            done, _ = concurrent.futures.wait(
+                [futures["micromamba"], futures["conda_pkgs"]],
+                return_when=concurrent.futures.ALL_COMPLETED,
+            )
+            for future in done:
+                future.result()
+            # start conda environment creation
+            futures["conda_env"] = executor.submit(
+                create_conda_environment, prefix, conda_pkgs_dir
+            )
+            if "pypi" in env:
+                # install pypi packages after conda environment is created and pypi packages are downloaded
+                done, _ = concurrent.futures.wait(
+                    [futures["conda_env"], futures["pypi_pkgs"]],
+                    return_when=concurrent.futures.ALL_COMPLETED,
+                )
+                for future in done:
+                    future.result()
+                # install pypi packages
+                futures["pypi_install"] = executor.submit(
+                    install_pypi_packages, prefix, pypi_pkgs_dir
+                )
+                # wait for pypi packages to be installed
+                futures["pypi_install"].result()
+            else:
+                # wait for conda environment to be created
+                futures["conda_env"].result()
+    setup_environment(architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir)

ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.12.39.1__py2.py3-none-any.whl

Potentially problematic release.

ob-metaflow 2.12.36.3py2.py3-none-any.whl → 2.12.39.1py2.py3-none-any.whl