PyPI - ob-metaflow - Versions diffs - 2.12.36.1__py2.py3-none-any.whl → 2.12.36.2__py2.py3-none-any.whl - Mend

ob-metaflow 2.12.36.1py2.py3-none-any.whl → 2.12.36.2py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ob-metaflow might be problematic. Click here for more details.

Files changed (56) hide show

metaflow/__init__.py +3 -0
metaflow/cli.py +84 -697
metaflow/cli_args.py +17 -0
metaflow/cli_components/__init__.py +0 -0
metaflow/cli_components/dump_cmd.py +96 -0
metaflow/cli_components/init_cmd.py +51 -0
metaflow/cli_components/run_cmds.py +358 -0
metaflow/cli_components/step_cmd.py +189 -0
metaflow/cli_components/utils.py +140 -0
metaflow/cmd/develop/stub_generator.py +9 -2
metaflow/decorators.py +63 -2
metaflow/extension_support/plugins.py +41 -27
metaflow/flowspec.py +156 -16
metaflow/includefile.py +50 -22
metaflow/metaflow_config.py +1 -1
metaflow/package.py +17 -3
metaflow/parameters.py +80 -23
metaflow/plugins/__init__.py +4 -0
metaflow/plugins/airflow/airflow_cli.py +1 -0
metaflow/plugins/argo/argo_workflows.py +44 -4
metaflow/plugins/argo/argo_workflows_cli.py +1 -0
metaflow/plugins/argo/argo_workflows_deployer_objects.py +5 -1
metaflow/plugins/aws/batch/batch_decorator.py +2 -2
metaflow/plugins/aws/step_functions/step_functions.py +32 -0
metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
metaflow/plugins/datatools/s3/s3op.py +3 -3
metaflow/plugins/kubernetes/kubernetes.py +3 -3
metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
metaflow/plugins/kubernetes/kubernetes_job.py +3 -3
metaflow/plugins/pypi/conda_decorator.py +20 -10
metaflow/plugins/pypi/pypi_decorator.py +11 -9
metaflow/plugins/timeout_decorator.py +2 -2
metaflow/runner/click_api.py +73 -19
metaflow/runner/deployer.py +1 -1
metaflow/runner/deployer_impl.py +2 -2
metaflow/runner/metaflow_runner.py +4 -1
metaflow/runner/nbdeploy.py +2 -0
metaflow/runner/nbrun.py +1 -1
metaflow/runner/subprocess_manager.py +3 -1
metaflow/runner/utils.py +37 -20
metaflow/runtime.py +111 -73
metaflow/sidecar/sidecar_worker.py +1 -1
metaflow/user_configs/__init__.py +0 -0
metaflow/user_configs/config_decorators.py +563 -0
metaflow/user_configs/config_options.py +495 -0
metaflow/user_configs/config_parameters.py +386 -0
metaflow/util.py +17 -0
metaflow/version.py +1 -1
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/METADATA +3 -2
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/RECORD +56 -46
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/LICENSE +0 -0
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/WHEEL +0 -0
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/entry_points.txt +0 -0
{ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/top_level.txt +0 -0

metaflow/runner/deployer.py CHANGED Viewed

@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
         The directory to run the subprocess in; if not specified, the current
         directory is used.
     file_read_timeout : int, default 3600
-        The timeout until which we try to read the deployer attribute file.
+        The timeout until which we try to read the deployer attribute file (in seconds).
     **kwargs : Any
         Additional arguments that you would pass to `python myflow.py` before
         the deployment command.

metaflow/runner/deployer_impl.py CHANGED Viewed

@@ -37,7 +37,7 @@ class DeployerImpl(object):
         The directory to run the subprocess in; if not specified, the current
         directory is used.
     file_read_timeout : int, default 3600
-        The timeout until which we try to read the deployer attribute file.
+        The timeout until which we try to read the deployer attribute file (in seconds).
     **kwargs : Any
         Additional arguments that you would pass to `python myflow.py` before
         the deployment command.
@@ -144,7 +144,7 @@ class DeployerImpl(object):
             # Additional info is used to pass additional deployer specific information.
             # It is used in non-OSS deployers (extensions).
             self.additional_info = content.get("additional_info", {})
+            command_obj.sync_wait()
             if command_obj.process.returncode == 0:
                 return create_class(deployer=self)

metaflow/runner/metaflow_runner.py CHANGED Viewed

@@ -221,7 +221,7 @@ class Runner(object):
         The directory to run the subprocess in; if not specified, the current
         directory is used.
     file_read_timeout : int, default 3600
-        The timeout until which we try to read the runner attribute file.
+        The timeout until which we try to read the runner attribute file (in seconds).
     **kwargs : Any
         Additional arguments that you would pass to `python myflow.py` before
         the `run` command.
@@ -272,6 +272,9 @@ class Runner(object):
     def __get_executing_run(self, attribute_file_fd, command_obj):
         content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
+        command_obj.sync_wait()
         content = json.loads(content)
         pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))

metaflow/runner/nbdeploy.py CHANGED Viewed

@@ -46,6 +46,8 @@ class NBDeployer(object):
     base_dir : str, optional, default None
         The directory to run the subprocess in; if not specified, the current
         working directory is used.
+    file_read_timeout : int, default 3600
+        The timeout until which we try to read the deployer attribute file (in seconds).
     **kwargs : Any
         Additional arguments that you would pass to `python myflow.py` i.e. options
         listed in `python myflow.py --help`

metaflow/runner/nbrun.py CHANGED Viewed

@@ -44,7 +44,7 @@ class NBRunner(object):
         The directory to run the subprocess in; if not specified, the current
         working directory is used.
     file_read_timeout : int, default 3600
-        The timeout until which we try to read the runner attribute file.
+        The timeout until which we try to read the runner attribute file (in seconds).
     **kwargs : Any
         Additional arguments that you would pass to `python myflow.py` before
         the `run` command.

metaflow/runner/subprocess_manager.py CHANGED Viewed

@@ -120,6 +120,9 @@ class SubprocessManager(object):
         """
         Run a command synchronously and return its process ID.
+        Note: in no case does this wait for the process to *finish*. Use sync_wait()
+        to wait for the command to finish.
         Parameters
         ----------
         command : List[str]
@@ -145,7 +148,6 @@ class SubprocessManager(object):
         command_obj = CommandManager(command, env, cwd)
         pid = command_obj.run(show_output=show_output)
         self.commands[pid] = command_obj
-        command_obj.sync_wait()
         return pid
     async def async_run_command(

metaflow/runner/utils.py CHANGED Viewed

@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
     encoding : str, optional
         Encoding to use while reading the file, by default "utf-8".
     timeout : int, optional
-        Timeout for reading the file in milliseconds, by default 3600.
+        Timeout for reading the file in seconds, by default 3600.
     Returns
     -------
@@ -107,30 +107,47 @@ def read_from_fifo_when_ready(
         content to the FIFO.
     """
     content = bytearray()
     poll = select.poll()
     poll.register(fifo_fd, select.POLLIN)
+    max_timeout = 3  # Wait for 10 * 3 = 30 ms after last write
     while True:
-        poll_begin = time.time()
-        poll.poll(timeout)
-        timeout -= 1000 * (time.time() - poll_begin)
-        if timeout <= 0:
+        if timeout < 0:
             raise TimeoutError("Timeout while waiting for the file content")
+        poll_begin = time.time()
+        # We poll for a very short time to be also able to check if the file was closed
+        # If the file is closed, we assume that we only have one writer so if we have
+        # data, we break out. This is to work around issues in macos
+        events = poll.poll(min(10, timeout * 1000))
+        timeout -= time.time() - poll_begin
         try:
-            data = os.read(fifo_fd, 128)
-            while data:
+            data = os.read(fifo_fd, 8192)
+            if data:
                 content += data
-                data = os.read(fifo_fd, 128)
-            # Read from a non-blocking closed FIFO returns an empty byte array
-            break
+            else:
+                if len(events):
+                    # We read an EOF -- consider the file done
+                    break
+                else:
+                    # We had no events (just a timeout) and the read didn't return
+                    # an exception so the file is still open; we continue waiting for data
+                    # Unfortunately, on MacOS, it seems that even *after* the file is
+                    # closed on the other end, we still don't get a BlockingIOError so
+                    # we hack our way and timeout if there is no write in 30ms which is
+                    # a relative eternity for file writes.
+                    if content:
+                        if max_timeout <= 0:
+                            break
+                        max_timeout -= 1
+                        continue
         except BlockingIOError:
-            # FIFO is open but no data is available yet
-            continue
+            has_blocking_error = True
+            if content:
+                # The file was closed
+                break
+            # else, if we have no content, we continue waiting for the file to be open
+            # and written to.
     if not content and check_process_exited(command_obj):
         raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -156,7 +173,7 @@ async def async_read_from_fifo_when_ready(
     encoding : str, optional
         Encoding to use while reading the file, by default "utf-8".
     timeout : int, optional
-        Timeout for reading the file in milliseconds, by default 3600.
+        Timeout for reading the file in seconds, by default 3600.
     Returns
     -------
@@ -206,7 +223,7 @@ def handle_timeout(
     command_obj : CommandManager
         Command manager object that encapsulates the running command details.
     file_read_timeout : int
-        Timeout for reading the file.
+        Timeout for reading the file, in seconds
     Returns
     -------
@@ -243,7 +260,7 @@ async def async_handle_timeout(
     command_obj : CommandManager
         Command manager object that encapsulates the running command details.
     file_read_timeout : int
-        Timeout for reading the file.
+        Timeout for reading the file, in seconds
     Returns
     -------

metaflow/runtime.py CHANGED Viewed

@@ -6,10 +6,12 @@ using local / remote processes
 """
 from __future__ import print_function
+import json
 import os
 import sys
 import fcntl
 import re
+import tempfile
 import time
 import subprocess
 from datetime import datetime
@@ -32,6 +34,7 @@ from . import procpoll
 from .datastore import TaskDataStoreSet
 from .debug import debug
 from .decorators import flow_decorators
+from .flowspec import _FlowState
 from .mflog import mflog, RUNTIME_LOG_SOURCE
 from .util import to_unicode, compress_list, unicode_type
 from .clone_util import clone_task_helper
@@ -40,6 +43,10 @@ from .unbounded_foreach import (
     UBF_CONTROL,
     UBF_TASK,
 )
+from .user_configs.config_options import ConfigInput
+from .user_configs.config_parameters import dump_config_values
 import metaflow.tracing as tracing
 MAX_WORKERS = 16
@@ -471,82 +478,95 @@ class NativeRuntime(object):
             else:
                 self._queue_push("start", {})
         progress_tstamp = time.time()
-        try:
-            # main scheduling loop
-            exception = None
-            while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
-                # 1. are any of the current workers finished?
-                if self._cloned_tasks:
-                    finished_tasks = self._cloned_tasks
-                    # reset the list of cloned tasks and let poll_workers handle
-                    # the remaining transition
-                    self._cloned_tasks = []
-                else:
-                    finished_tasks = list(self._poll_workers())
-                # 2. push new tasks triggered by the finished tasks to the queue
-                self._queue_tasks(finished_tasks)
-                # 3. if there are available worker slots, pop and start tasks
-                #    from the queue.
-                self._launch_workers()
-                if time.time() - progress_tstamp > PROGRESS_INTERVAL:
-                    progress_tstamp = time.time()
-                    tasks_print = ", ".join(
-                        [
-                            "%s (%d running; %d done)" % (k, v[0], v[1])
-                            for k, v in self._active_tasks.items()
-                            if k != 0 and v[0] > 0
-                        ]
-                    )
-                    if self._active_tasks[0] == 0:
-                        msg = "No tasks are running."
+        with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
+            # Configurations are passed through a file to avoid overloading the
+            # command-line. We only need to create this file once and it can be reused
+            # for any task launch
+            config_value = dump_config_values(self._flow)
+            if config_value:
+                json.dump(config_value, config_file)
+                config_file.flush()
+                self._config_file_name = config_file.name
+            else:
+                self._config_file_name = None
+            try:
+                # main scheduling loop
+                exception = None
+                while (
+                    self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks
+                ):
+                    # 1. are any of the current workers finished?
+                    if self._cloned_tasks:
+                        finished_tasks = self._cloned_tasks
+                        # reset the list of cloned tasks and let poll_workers handle
+                        # the remaining transition
+                        self._cloned_tasks = []
                     else:
-                        if self._active_tasks[0] == 1:
-                            msg = "1 task is running: "
+                        finished_tasks = list(self._poll_workers())
+                    # 2. push new tasks triggered by the finished tasks to the queue
+                    self._queue_tasks(finished_tasks)
+                    # 3. if there are available worker slots, pop and start tasks
+                    #    from the queue.
+                    self._launch_workers()
+                    if time.time() - progress_tstamp > PROGRESS_INTERVAL:
+                        progress_tstamp = time.time()
+                        tasks_print = ", ".join(
+                            [
+                                "%s (%d running; %d done)" % (k, v[0], v[1])
+                                for k, v in self._active_tasks.items()
+                                if k != 0 and v[0] > 0
+                            ]
+                        )
+                        if self._active_tasks[0] == 0:
+                            msg = "No tasks are running."
                         else:
-                            msg = "%d tasks are running: " % self._active_tasks[0]
-                        msg += "%s." % tasks_print
+                            if self._active_tasks[0] == 1:
+                                msg = "1 task is running: "
+                            else:
+                                msg = "%d tasks are running: " % self._active_tasks[0]
+                            msg += "%s." % tasks_print
-                    self._logger(msg, system_msg=True)
+                        self._logger(msg, system_msg=True)
-                    if len(self._run_queue) == 0:
-                        msg = "No tasks are waiting in the queue."
-                    else:
-                        if len(self._run_queue) == 1:
-                            msg = "1 task is waiting in the queue: "
+                        if len(self._run_queue) == 0:
+                            msg = "No tasks are waiting in the queue."
                         else:
-                            msg = "%d tasks are waiting in the queue." % len(
-                                self._run_queue
-                            )
+                            if len(self._run_queue) == 1:
+                                msg = "1 task is waiting in the queue: "
+                            else:
+                                msg = "%d tasks are waiting in the queue." % len(
+                                    self._run_queue
+                                )
-                    self._logger(msg, system_msg=True)
-                    if len(self._unprocessed_steps) > 0:
-                        if len(self._unprocessed_steps) == 1:
-                            msg = "%s step has not started" % (
-                                next(iter(self._unprocessed_steps)),
-                            )
-                        else:
-                            msg = "%d steps have not started: " % len(
-                                self._unprocessed_steps
-                            )
-                            msg += "%s." % ", ".join(self._unprocessed_steps)
                         self._logger(msg, system_msg=True)
-        except KeyboardInterrupt as ex:
-            self._logger("Workflow interrupted.", system_msg=True, bad=True)
-            self._killall()
-            exception = ex
-            raise
-        except Exception as ex:
-            self._logger("Workflow failed.", system_msg=True, bad=True)
-            self._killall()
-            exception = ex
-            raise
-        finally:
-            # on finish clean tasks
-            for step in self._flow:
-                for deco in step.decorators:
-                    deco.runtime_finished(exception)
+                        if len(self._unprocessed_steps) > 0:
+                            if len(self._unprocessed_steps) == 1:
+                                msg = "%s step has not started" % (
+                                    next(iter(self._unprocessed_steps)),
+                                )
+                            else:
+                                msg = "%d steps have not started: " % len(
+                                    self._unprocessed_steps
+                                )
+                                msg += "%s." % ", ".join(self._unprocessed_steps)
+                            self._logger(msg, system_msg=True)
+            except KeyboardInterrupt as ex:
+                self._logger("Workflow interrupted.", system_msg=True, bad=True)
+                self._killall()
+                exception = ex
+                raise
+            except Exception as ex:
+                self._logger("Workflow failed.", system_msg=True, bad=True)
+                self._killall()
+                exception = ex
+                raise
+            finally:
+                # on finish clean tasks
+                for step in self._flow:
+                    for deco in step.decorators:
+                        deco.runtime_finished(exception)
         # assert that end was executed and it was successful
         if ("end", ()) in self._finished:
@@ -957,7 +977,7 @@ class NativeRuntime(object):
             )
             return
-        worker = Worker(task, self._max_log_size)
+        worker = Worker(task, self._max_log_size, self._config_file_name)
         for fd in worker.fds():
             self._workers[fd] = worker
             self._poll.add(fd)
@@ -1237,7 +1257,6 @@ class Task(object):
         # Open the output datastore only if the task is not being cloned.
         if not self._is_cloned:
             self.new_attempt()
             for deco in decos:
                 deco.runtime_task_created(
                     self._ds,
@@ -1504,6 +1523,15 @@ class CLIArgs(object):
         for deco in flow_decorators(self.task.flow):
             self.top_level_options.update(deco.get_top_level_options())
+        # We also pass configuration options using the kv.<name> syntax which will cause
+        # the configuration options to be loaded from the CONFIG file (or local-config-file
+        # in the case of the local runtime)
+        configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
+        if configs:
+            self.top_level_options["config-value"] = [
+                (k, ConfigInput.make_key_name(k)) for k in configs
+            ]
         self.commands = ["step"]
         self.command_args = [self.task.step]
         self.command_options = {
@@ -1537,12 +1565,15 @@ class CLIArgs(object):
                 for value in v:
                     yield "--%s" % k
                     if not isinstance(value, bool):
-                        yield to_unicode(value)
+                        value = value if isinstance(value, tuple) else (value,)
+                        for vv in value:
+                            yield to_unicode(vv)
         args = list(self.entrypoint)
         args.extend(_options(self.top_level_options))
         args.extend(self.commands)
         args.extend(self.command_args)
         args.extend(_options(self.command_options))
         return args
@@ -1554,8 +1585,9 @@ class CLIArgs(object):
 class Worker(object):
-    def __init__(self, task, max_logs_size):
+    def __init__(self, task, max_logs_size, config_file_name):
         self.task = task
+        self._config_file_name = config_file_name
         self._proc = self._launch()
         if task.retries > task.user_code_retries:
@@ -1607,6 +1639,12 @@ class Worker(object):
                     self.task.user_code_retries,
                     self.task.ubf_context,
                 )
+        # Add user configurations using a file to avoid using up too much space on the
+        # command line
+        if self._config_file_name:
+            args.top_level_options["local-config-file"] = self._config_file_name
+        # Pass configuration options
         env.update(args.get_env())
         env["PYTHONUNBUFFERED"] = "x"
         tracing.inject_tracing_vars(env)

metaflow/sidecar/sidecar_worker.py CHANGED Viewed

@@ -48,8 +48,8 @@ def process_messages(worker_type, worker):
         pass
-@tracing.cli_entrypoint("sidecar")
 @click.command(help="Initialize workers")
+@tracing.cli_entrypoint("sidecar")
 @click.argument("worker-type")
 def main(worker_type):
     sidecar_type = SIDECARS.get(worker_type)

metaflow/user_configs/__init__.py ADDED Viewed

File without changes

ob-metaflow 2.12.36.1__py2.py3-none-any.whl → 2.12.36.2__py2.py3-none-any.whl

Potentially problematic release.

ob-metaflow 2.12.36.1py2.py3-none-any.whl → 2.12.36.2py2.py3-none-any.whl