PyPI - metaflow - Versions diffs - 2.12.8__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl - Mend

metaflow 2.12.8py2.py3-none-any.whl → 2.12.9py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

metaflow/__init__.py +2 -0
metaflow/cli.py +12 -4
metaflow/extension_support/plugins.py +1 -0
metaflow/flowspec.py +8 -1
metaflow/lint.py +13 -0
metaflow/metaflow_current.py +0 -8
metaflow/plugins/__init__.py +12 -0
metaflow/plugins/argo/argo_workflows.py +462 -42
metaflow/plugins/argo/argo_workflows_cli.py +60 -3
metaflow/plugins/argo/argo_workflows_decorator.py +38 -7
metaflow/plugins/argo/argo_workflows_deployer.py +290 -0
metaflow/plugins/argo/jobset_input_paths.py +16 -0
metaflow/plugins/aws/batch/batch_decorator.py +16 -13
metaflow/plugins/aws/step_functions/step_functions_cli.py +45 -3
metaflow/plugins/aws/step_functions/step_functions_deployer.py +251 -0
metaflow/plugins/cards/card_cli.py +1 -1
metaflow/plugins/kubernetes/kubernetes.py +279 -52
metaflow/plugins/kubernetes/kubernetes_cli.py +26 -8
metaflow/plugins/kubernetes/kubernetes_client.py +0 -1
metaflow/plugins/kubernetes/kubernetes_decorator.py +56 -44
metaflow/plugins/kubernetes/kubernetes_job.py +6 -6
metaflow/plugins/kubernetes/kubernetes_jobsets.py +510 -272
metaflow/plugins/parallel_decorator.py +108 -8
metaflow/plugins/secrets/secrets_decorator.py +12 -3
metaflow/plugins/test_unbounded_foreach_decorator.py +39 -4
metaflow/runner/deployer.py +386 -0
metaflow/runner/metaflow_runner.py +1 -20
metaflow/runner/nbdeploy.py +130 -0
metaflow/runner/nbrun.py +4 -28
metaflow/runner/utils.py +49 -0
metaflow/runtime.py +246 -134
metaflow/version.py +1 -1
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/METADATA +2 -2
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/RECORD +38 -32
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/WHEEL +1 -1
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/LICENSE +0 -0
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/entry_points.txt +0 -0
{metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/top_level.txt +0 -0

metaflow/plugins/argo/argo_workflows_cli.py CHANGED Viewed

@@ -5,7 +5,9 @@ import re
 import sys
 from hashlib import sha1
-from metaflow import JSONType, current, decorators, parameters
+from metaflow import Run, JSONType, current, decorators, parameters
+from metaflow.client.core import get_metadata
+from metaflow.exception import MetaflowNotFound
 from metaflow._vendor import click
 from metaflow.exception import MetaflowException, MetaflowInternalError
 from metaflow.metaflow_config import (
@@ -165,6 +167,14 @@ def argo_workflows(obj, name=None):
     default="",
     help="PagerDuty Events API V2 Integration key for workflow success/failure notifications.",
 )
+@click.option(
+    "--deployer-attribute-file",
+    default=None,
+    show_default=True,
+    type=str,
+    help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
+    hidden=True,
+)
 @click.pass_obj
 def create(
     obj,
@@ -182,9 +192,21 @@ def create(
     notify_on_success=False,
     notify_slack_webhook_url=None,
     notify_pager_duty_integration_key=None,
+    deployer_attribute_file=None,
 ):
     validate_tags(tags)
+    if deployer_attribute_file:
+        with open(deployer_attribute_file, "w") as f:
+            json.dump(
+                {
+                    "name": obj.workflow_name,
+                    "flow_name": obj.flow.name,
+                    "metadata": get_metadata(),
+                },
+                f,
+            )
     obj.echo("Deploying *%s* to Argo Workflows..." % obj.workflow_name, bold=True)
     if SERVICE_VERSION_CHECK:
@@ -563,8 +585,16 @@ def resolve_token(
     type=str,
     help="Write the ID of this run to the file specified.",
 )
+@click.option(
+    "--deployer-attribute-file",
+    default=None,
+    show_default=True,
+    type=str,
+    help="Write the metadata and pathspec of this run to the file specified.\nUsed internally for Metaflow's Deployer API.",
+    hidden=True,
+)
 @click.pass_obj
-def trigger(obj, run_id_file=None, **kwargs):
+def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
     def _convert_value(param):
         # Swap `-` with `_` in parameter name to match click's behavior
         val = kwargs.get(param.name.replace("-", "_").lower())
@@ -587,6 +617,17 @@ def trigger(obj, run_id_file=None, **kwargs):
         with open(run_id_file, "w") as f:
             f.write(str(run_id))
+    if deployer_attribute_file:
+        with open(deployer_attribute_file, "w") as f:
+            json.dump(
+                {
+                    "name": obj.workflow_name,
+                    "metadata": get_metadata(),
+                    "pathspec": "/".join((obj.flow.name, run_id)),
+                },
+                f,
+            )
     obj.echo(
         "Workflow *{name}* triggered on Argo Workflows "
         "(run-id *{run_id}*).".format(name=obj.workflow_name, run_id=run_id),
@@ -786,6 +827,20 @@ def validate_token(name, token_prefix, authorize, instructions_fn=None):
     return True
+def get_run_object(pathspec: str):
+    try:
+        return Run(pathspec, _namespace_check=False)
+    except MetaflowNotFound:
+        return None
+def get_status_considering_run_object(status, run_obj):
+    remapped_status = remap_status(status)
+    if remapped_status == "Running" and run_obj is None:
+        return "Pending"
+    return remapped_status
 @argo_workflows.command(help="Fetch flow execution status on Argo Workflows.")
 @click.argument("run-id", required=True, type=str)
 @click.pass_obj
@@ -803,8 +858,10 @@ def status(obj, run_id):
     # Trim prefix from run_id
     name = run_id[5:]
     status = ArgoWorkflows.get_workflow_status(obj.flow.name, name)
+    run_obj = get_run_object("/".join((obj.flow.name, run_id)))
     if status is not None:
-        obj.echo_always(remap_status(status))
+        status = get_status_considering_run_object(status, run_obj)
+        obj.echo_always(status)
 @argo_workflows.command(help="Terminate flow execution on Argo Workflows.")

metaflow/plugins/argo/argo_workflows_decorator.py CHANGED Viewed

@@ -2,12 +2,14 @@ import json
 import os
 import time
 from metaflow import current
 from metaflow.decorators import StepDecorator
 from metaflow.events import Trigger
 from metaflow.metadata import MetaDatum
 from metaflow.metaflow_config import ARGO_EVENTS_WEBHOOK_URL
+from metaflow.graph import DAGNode, FlowGraph
+from metaflow.flowspec import FlowSpec
 from .argo_events import ArgoEvent
@@ -83,7 +85,13 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
         metadata.register_metadata(run_id, step_name, task_id, entries)
     def task_finished(
-        self, step_name, flow, graph, is_task_ok, retry_count, max_user_code_retries
+        self,
+        step_name,
+        flow: FlowSpec,
+        graph: FlowGraph,
+        is_task_ok,
+        retry_count,
+        max_user_code_retries,
     ):
         if not is_task_ok:
             # The task finished with an exception - execution won't
@@ -100,16 +108,39 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
         # we run pods with a security context. We work around this constraint by
         # mounting an emptyDir volume.
         if graph[step_name].type == "foreach":
+            # A DAGNode is considered a `parallel_step` if it is annotated by the @parallel decorator.
+            # A DAGNode is considered a `parallel_foreach` if it contains a `num_parallel` kwarg provided to the
+            # `next` method of that DAGNode.
+            # At this moment in the code we care if a node is marked as a `parallel_foreach` so that we can pass down the
+            # value of `num_parallel` to the subsequent steps.
+            # For @parallel, the implmentation uses 1 jobset object. That one jobset
+            # object internally creates 'num_parallel' jobs. So, we set foreach_num_splits
+            # to 1 here for @parallel. The parallelism of jobset is handled in
+            # kubernetes_job.py.
+            if graph[step_name].parallel_foreach:
+                with open("/mnt/out/num_parallel", "w") as f:
+                    json.dump(flow._parallel_ubf_iter.num_parallel, f)
+                flow._foreach_num_splits = 1
+                with open("/mnt/out/task_id_entropy", "w") as file:
+                    import uuid
+                    file.write(uuid.uuid4().hex[:6])
             with open("/mnt/out/splits", "w") as file:
                 json.dump(list(range(flow._foreach_num_splits)), file)
             with open("/mnt/out/split_cardinality", "w") as file:
                 json.dump(flow._foreach_num_splits, file)
-        # Unfortunately, we can't always use pod names as task-ids since the pod names
-        # are not static across retries. We write the task-id to a file that is read
-        # by the next task here.
-        with open("/mnt/out/task_id", "w") as file:
-            file.write(self.task_id)
+        # for steps that have a `@parallel` decorator set to them, we will be relying on Jobsets
+        # to run the task. In this case, we cannot set anything in the
+        # `/mnt/out` directory, since such form of output mounts are not available to jobset execution as
+        # argo just treats it like A K8s resource that it throws in the cluster.
+        if not graph[step_name].parallel_step:
+            # Unfortunately, we can't always use pod names as task-ids since the pod names
+            # are not static across retries. We write the task-id to a file that is read
+            # by the next task here.
+            with open("/mnt/out/task_id", "w") as file:
+                file.write(self.task_id)
         # Emit Argo Events given that the flow has succeeded. Given that we only
         # emit events when the task succeeds, we can piggy back on this decorator

metaflow/plugins/argo/argo_workflows_deployer.py ADDED Viewed

@@ -0,0 +1,290 @@
+import sys
+import tempfile
+from typing import Optional, ClassVar
+from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
+from metaflow.runner.deployer import (
+    DeployerImpl,
+    DeployedFlow,
+    TriggeredRun,
+    get_lower_level_group,
+    handle_timeout,
+)
+def suspend(instance: TriggeredRun, **kwargs):
+    """
+    Suspend the running workflow.
+    Parameters
+    ----------
+    **kwargs : Any
+        Additional arguments to pass to the suspend command.
+    Returns
+    -------
+    bool
+        True if the command was successful, False otherwise.
+    """
+    _, run_id = instance.pathspec.split("/")
+    # every subclass needs to have `self.deployer_kwargs`
+    command = get_lower_level_group(
+        instance.deployer.api,
+        instance.deployer.top_level_kwargs,
+        instance.deployer.TYPE,
+        instance.deployer.deployer_kwargs,
+    ).suspend(run_id=run_id, **kwargs)
+    pid = instance.deployer.spm.run_command(
+        [sys.executable, *command],
+        env=instance.deployer.env_vars,
+        cwd=instance.deployer.cwd,
+        show_output=instance.deployer.show_output,
+    )
+    command_obj = instance.deployer.spm.get(pid)
+    return command_obj.process.returncode == 0
+def unsuspend(instance: TriggeredRun, **kwargs):
+    """
+    Unsuspend the suspended workflow.
+    Parameters
+    ----------
+    **kwargs : Any
+        Additional arguments to pass to the unsuspend command.
+    Returns
+    -------
+    bool
+        True if the command was successful, False otherwise.
+    """
+    _, run_id = instance.pathspec.split("/")
+    # every subclass needs to have `self.deployer_kwargs`
+    command = get_lower_level_group(
+        instance.deployer.api,
+        instance.deployer.top_level_kwargs,
+        instance.deployer.TYPE,
+        instance.deployer.deployer_kwargs,
+    ).unsuspend(run_id=run_id, **kwargs)
+    pid = instance.deployer.spm.run_command(
+        [sys.executable, *command],
+        env=instance.deployer.env_vars,
+        cwd=instance.deployer.cwd,
+        show_output=instance.deployer.show_output,
+    )
+    command_obj = instance.deployer.spm.get(pid)
+    return command_obj.process.returncode == 0
+def terminate(instance: TriggeredRun, **kwargs):
+    """
+    Terminate the running workflow.
+    Parameters
+    ----------
+    **kwargs : Any
+        Additional arguments to pass to the terminate command.
+    Returns
+    -------
+    bool
+        True if the command was successful, False otherwise.
+    """
+    _, run_id = instance.pathspec.split("/")
+    # every subclass needs to have `self.deployer_kwargs`
+    command = get_lower_level_group(
+        instance.deployer.api,
+        instance.deployer.top_level_kwargs,
+        instance.deployer.TYPE,
+        instance.deployer.deployer_kwargs,
+    ).terminate(run_id=run_id, **kwargs)
+    pid = instance.deployer.spm.run_command(
+        [sys.executable, *command],
+        env=instance.deployer.env_vars,
+        cwd=instance.deployer.cwd,
+        show_output=instance.deployer.show_output,
+    )
+    command_obj = instance.deployer.spm.get(pid)
+    return command_obj.process.returncode == 0
+def status(instance: TriggeredRun):
+    """
+    Get the status of the triggered run.
+    Returns
+    -------
+    str, optional
+        The status of the workflow considering the run object, or None if the status could not be retrieved.
+    """
+    from metaflow.plugins.argo.argo_workflows_cli import (
+        get_status_considering_run_object,
+    )
+    flow_name, run_id = instance.pathspec.split("/")
+    name = run_id[5:]
+    status = ArgoWorkflows.get_workflow_status(flow_name, name)
+    if status is not None:
+        return get_status_considering_run_object(status, instance.run)
+    return None
+def production_token(instance: DeployedFlow):
+    """
+    Get the production token for the deployed flow.
+    Returns
+    -------
+    str, optional
+        The production token, None if it cannot be retrieved.
+    """
+    try:
+        _, production_token = ArgoWorkflows.get_existing_deployment(
+            instance.deployer.name
+        )
+        return production_token
+    except TypeError:
+        return None
+def delete(instance: DeployedFlow, **kwargs):
+    """
+    Delete the deployed flow.
+    Parameters
+    ----------
+    **kwargs : Any
+        Additional arguments to pass to the delete command.
+    Returns
+    -------
+    bool
+        True if the command was successful, False otherwise.
+    """
+    command = get_lower_level_group(
+        instance.deployer.api,
+        instance.deployer.top_level_kwargs,
+        instance.deployer.TYPE,
+        instance.deployer.deployer_kwargs,
+    ).delete(**kwargs)
+    pid = instance.deployer.spm.run_command(
+        [sys.executable, *command],
+        env=instance.deployer.env_vars,
+        cwd=instance.deployer.cwd,
+        show_output=instance.deployer.show_output,
+    )
+    command_obj = instance.deployer.spm.get(pid)
+    return command_obj.process.returncode == 0
+def trigger(instance: DeployedFlow, **kwargs):
+    """
+    Trigger a new run for the deployed flow.
+    Parameters
+    ----------
+    **kwargs : Any
+        Additional arguments to pass to the trigger command, `Parameters` in particular
+    Returns
+    -------
+    ArgoWorkflowsTriggeredRun
+        The triggered run instance.
+    Raises
+    ------
+    Exception
+        If there is an error during the trigger process.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        tfp_runner_attribute = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False)
+        # every subclass needs to have `self.deployer_kwargs`
+        command = get_lower_level_group(
+            instance.deployer.api,
+            instance.deployer.top_level_kwargs,
+            instance.deployer.TYPE,
+            instance.deployer.deployer_kwargs,
+        ).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
+        pid = instance.deployer.spm.run_command(
+            [sys.executable, *command],
+            env=instance.deployer.env_vars,
+            cwd=instance.deployer.cwd,
+            show_output=instance.deployer.show_output,
+        )
+        command_obj = instance.deployer.spm.get(pid)
+        content = handle_timeout(tfp_runner_attribute, command_obj)
+        if command_obj.process.returncode == 0:
+            triggered_run = TriggeredRun(deployer=instance.deployer, content=content)
+            triggered_run._enrich_object(
+                {
+                    "status": property(status),
+                    "terminate": terminate,
+                    "suspend": suspend,
+                    "unsuspend": unsuspend,
+                }
+            )
+            return triggered_run
+    raise Exception(
+        "Error triggering %s on %s for %s"
+        % (instance.deployer.name, instance.deployer.TYPE, instance.deployer.flow_file)
+    )
+class ArgoWorkflowsDeployer(DeployerImpl):
+    """
+    Deployer implementation for Argo Workflows.
+    Attributes
+    ----------
+    TYPE : ClassVar[Optional[str]]
+        The type of the deployer, which is "argo-workflows".
+    """
+    TYPE: ClassVar[Optional[str]] = "argo-workflows"
+    def __init__(self, deployer_kwargs, **kwargs):
+        """
+        Initialize the ArgoWorkflowsDeployer.
+        Parameters
+        ----------
+        deployer_kwargs : dict
+            The deployer-specific keyword arguments.
+        **kwargs : Any
+            Additional arguments to pass to the superclass constructor.
+        """
+        self.deployer_kwargs = deployer_kwargs
+        super().__init__(**kwargs)
+    def _enrich_deployed_flow(self, deployed_flow: DeployedFlow):
+        """
+        Enrich the DeployedFlow object with additional properties and methods.
+        Parameters
+        ----------
+        deployed_flow : DeployedFlow
+            The deployed flow object to enrich.
+        """
+        deployed_flow._enrich_object(
+            {
+                "production_token": property(production_token),
+                "trigger": trigger,
+                "delete": delete,
+            }
+        )

metaflow/plugins/argo/jobset_input_paths.py ADDED Viewed

@@ -0,0 +1,16 @@
+import sys
+from hashlib import md5
+def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):
+    # => run_id/step/:foo,bar
+    control_id = "control-{}-0".format(task_id_entropy)
+    worker_ids = [
+        "worker-{}-{}".format(task_id_entropy, i) for i in range(int(num_parallel) - 1)
+    ]
+    ids = [control_id] + worker_ids
+    return "{}/{}/:{}".format(run_id, step_name, ",".join(ids))
+if __name__ == "__main__":
+    print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))

metaflow/plugins/aws/batch/batch_decorator.py CHANGED Viewed

@@ -261,8 +261,8 @@ class BatchDecorator(StepDecorator):
         # metadata. A rudimentary way to detect non-local execution is to
         # check for the existence of AWS_BATCH_JOB_ID environment variable.
+        meta = {}
         if "AWS_BATCH_JOB_ID" in os.environ:
-            meta = {}
             meta["aws-batch-job-id"] = os.environ["AWS_BATCH_JOB_ID"]
             meta["aws-batch-job-attempt"] = os.environ["AWS_BATCH_JOB_ATTEMPT"]
             meta["aws-batch-ce-name"] = os.environ["AWS_BATCH_CE_NAME"]
@@ -290,18 +290,6 @@ class BatchDecorator(StepDecorator):
             instance_meta = get_ec2_instance_metadata()
             meta.update(instance_meta)
-            entries = [
-                MetaDatum(
-                    field=k,
-                    value=v,
-                    type=k,
-                    tags=["attempt_id:{0}".format(retry_count)],
-                )
-                for k, v in meta.items()
-            ]
-            # Register book-keeping metadata for debugging.
-            metadata.register_metadata(run_id, step_name, task_id, entries)
             self._save_logs_sidecar = Sidecar("save_logs_periodically")
             self._save_logs_sidecar.start()
@@ -322,6 +310,21 @@ class BatchDecorator(StepDecorator):
         if num_parallel >= 1:
             _setup_multinode_environment()
+            # current.parallel.node_index will be correctly available over here.
+            meta.update({"parallel-node-index": current.parallel.node_index})
+        if len(meta) > 0:
+            entries = [
+                MetaDatum(
+                    field=k,
+                    value=v,
+                    type=k,
+                    tags=["attempt_id:{0}".format(retry_count)],
+                )
+                for k, v in meta.items()
+            ]
+            # Register book-keeping metadata for debugging.
+            metadata.register_metadata(run_id, step_name, task_id, entries)
     def task_finished(
         self, step_name, flow, graph, is_task_ok, retry_count, max_retries

metaflow/plugins/aws/step_functions/step_functions_cli.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 from hashlib import sha1
 from metaflow import JSONType, current, decorators, parameters
+from metaflow.client.core import get_metadata
 from metaflow._vendor import click
 from metaflow.exception import MetaflowException, MetaflowInternalError
 from metaflow.metaflow_config import (
@@ -130,6 +131,14 @@ def step_functions(obj, name=None):
     help="Use AWS Step Functions Distributed Map instead of Inline Map for "
     "defining foreach tasks in Amazon State Language.",
 )
+@click.option(
+    "--deployer-attribute-file",
+    default=None,
+    show_default=True,
+    type=str,
+    help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
+    hidden=True,
+)
 @click.pass_obj
 def create(
     obj,
@@ -143,9 +152,21 @@ def create(
     workflow_timeout=None,
     log_execution_history=False,
     use_distributed_map=False,
+    deployer_attribute_file=None,
 ):
     validate_tags(tags)
+    if deployer_attribute_file:
+        with open(deployer_attribute_file, "w") as f:
+            json.dump(
+                {
+                    "name": obj.state_machine_name,
+                    "flow_name": obj.flow.name,
+                    "metadata": get_metadata(),
+                },
+                f,
+            )
     obj.echo(
         "Deploying *%s* to AWS Step Functions..." % obj.state_machine_name, bold=True
     )
@@ -231,8 +252,10 @@ def check_metadata_service_version(obj):
 def resolve_state_machine_name(obj, name):
-    def attach_prefix(name):
-        if SFN_STATE_MACHINE_PREFIX is not None:
+    def attach_prefix(name: str):
+        if SFN_STATE_MACHINE_PREFIX is not None and (
+            not name.startswith(SFN_STATE_MACHINE_PREFIX)
+        ):
             return SFN_STATE_MACHINE_PREFIX + "_" + name
         return name
@@ -440,8 +463,16 @@ def resolve_token(
     type=str,
     help="Write the ID of this run to the file specified.",
 )
+@click.option(
+    "--deployer-attribute-file",
+    default=None,
+    show_default=True,
+    type=str,
+    help="Write the metadata and pathspec of this run to the file specified.\nUsed internally for Metaflow's Deployer API.",
+    hidden=True,
+)
 @click.pass_obj
-def trigger(obj, run_id_file=None, **kwargs):
+def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
     def _convert_value(param):
         # Swap `-` with `_` in parameter name to match click's behavior
         val = kwargs.get(param.name.replace("-", "_").lower())
@@ -466,6 +497,17 @@ def trigger(obj, run_id_file=None, **kwargs):
         with open(run_id_file, "w") as f:
             f.write(str(run_id))
+    if deployer_attribute_file:
+        with open(deployer_attribute_file, "w") as f:
+            json.dump(
+                {
+                    "name": obj.state_machine_name,
+                    "metadata": get_metadata(),
+                    "pathspec": "/".join((obj.flow.name, run_id)),
+                },
+                f,
+            )
     obj.echo(
         "Workflow *{name}* triggered on AWS Step Functions "
         "(run-id *{run_id}*).".format(name=obj.state_machine_name, run_id=run_id),

metaflow 2.12.8__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl

metaflow 2.12.8py2.py3-none-any.whl → 2.12.9py2.py3-none-any.whl