ob-metaflow 2.12.36.1__py2.py3-none-any.whl → 2.12.36.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (56) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +63 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +44 -4
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/argo/argo_workflows_deployer_objects.py +5 -1
  23. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  24. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  25. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  26. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  27. metaflow/plugins/datatools/s3/s3op.py +3 -3
  28. metaflow/plugins/kubernetes/kubernetes.py +3 -3
  29. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  30. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  31. metaflow/plugins/kubernetes/kubernetes_job.py +3 -3
  32. metaflow/plugins/pypi/conda_decorator.py +20 -10
  33. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  34. metaflow/plugins/timeout_decorator.py +2 -2
  35. metaflow/runner/click_api.py +73 -19
  36. metaflow/runner/deployer.py +1 -1
  37. metaflow/runner/deployer_impl.py +2 -2
  38. metaflow/runner/metaflow_runner.py +4 -1
  39. metaflow/runner/nbdeploy.py +2 -0
  40. metaflow/runner/nbrun.py +1 -1
  41. metaflow/runner/subprocess_manager.py +3 -1
  42. metaflow/runner/utils.py +37 -20
  43. metaflow/runtime.py +111 -73
  44. metaflow/sidecar/sidecar_worker.py +1 -1
  45. metaflow/user_configs/__init__.py +0 -0
  46. metaflow/user_configs/config_decorators.py +563 -0
  47. metaflow/user_configs/config_options.py +495 -0
  48. metaflow/user_configs/config_parameters.py +386 -0
  49. metaflow/util.py +17 -0
  50. metaflow/version.py +1 -1
  51. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/METADATA +3 -2
  52. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/RECORD +56 -46
  53. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/LICENSE +0 -0
  54. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/WHEEL +0 -0
  55. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/entry_points.txt +0 -0
  56. {ob_metaflow-2.12.36.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/top_level.txt +0 -0
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file.
67
+ The timeout until which we try to read the deployer attribute file (in seconds).
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -37,7 +37,7 @@ class DeployerImpl(object):
37
37
  The directory to run the subprocess in; if not specified, the current
38
38
  directory is used.
39
39
  file_read_timeout : int, default 3600
40
- The timeout until which we try to read the deployer attribute file.
40
+ The timeout until which we try to read the deployer attribute file (in seconds).
41
41
  **kwargs : Any
42
42
  Additional arguments that you would pass to `python myflow.py` before
43
43
  the deployment command.
@@ -144,7 +144,7 @@ class DeployerImpl(object):
144
144
  # Additional info is used to pass additional deployer specific information.
145
145
  # It is used in non-OSS deployers (extensions).
146
146
  self.additional_info = content.get("additional_info", {})
147
-
147
+ command_obj.sync_wait()
148
148
  if command_obj.process.returncode == 0:
149
149
  return create_class(deployer=self)
150
150
 
@@ -221,7 +221,7 @@ class Runner(object):
221
221
  The directory to run the subprocess in; if not specified, the current
222
222
  directory is used.
223
223
  file_read_timeout : int, default 3600
224
- The timeout until which we try to read the runner attribute file.
224
+ The timeout until which we try to read the runner attribute file (in seconds).
225
225
  **kwargs : Any
226
226
  Additional arguments that you would pass to `python myflow.py` before
227
227
  the `run` command.
@@ -272,6 +272,9 @@ class Runner(object):
272
272
 
273
273
  def __get_executing_run(self, attribute_file_fd, command_obj):
274
274
  content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
275
+
276
+ command_obj.sync_wait()
277
+
275
278
  content = json.loads(content)
276
279
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
277
280
 
@@ -46,6 +46,8 @@ class NBDeployer(object):
46
46
  base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
+ file_read_timeout : int, default 3600
50
+ The timeout until which we try to read the deployer attribute file (in seconds).
49
51
  **kwargs : Any
50
52
  Additional arguments that you would pass to `python myflow.py` i.e. options
51
53
  listed in `python myflow.py --help`
metaflow/runner/nbrun.py CHANGED
@@ -44,7 +44,7 @@ class NBRunner(object):
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
47
- The timeout until which we try to read the runner attribute file.
47
+ The timeout until which we try to read the runner attribute file (in seconds).
48
48
  **kwargs : Any
49
49
  Additional arguments that you would pass to `python myflow.py` before
50
50
  the `run` command.
@@ -120,6 +120,9 @@ class SubprocessManager(object):
120
120
  """
121
121
  Run a command synchronously and return its process ID.
122
122
 
123
+ Note: in no case does this wait for the process to *finish*. Use sync_wait()
124
+ to wait for the command to finish.
125
+
123
126
  Parameters
124
127
  ----------
125
128
  command : List[str]
@@ -145,7 +148,6 @@ class SubprocessManager(object):
145
148
  command_obj = CommandManager(command, env, cwd)
146
149
  pid = command_obj.run(show_output=show_output)
147
150
  self.commands[pid] = command_obj
148
- command_obj.sync_wait()
149
151
  return pid
150
152
 
151
153
  async def async_run_command(
metaflow/runner/utils.py CHANGED
@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
91
91
  encoding : str, optional
92
92
  Encoding to use while reading the file, by default "utf-8".
93
93
  timeout : int, optional
94
- Timeout for reading the file in milliseconds, by default 3600.
94
+ Timeout for reading the file in seconds, by default 3600.
95
95
 
96
96
  Returns
97
97
  -------
@@ -107,30 +107,47 @@ def read_from_fifo_when_ready(
107
107
  content to the FIFO.
108
108
  """
109
109
  content = bytearray()
110
-
111
110
  poll = select.poll()
112
111
  poll.register(fifo_fd, select.POLLIN)
113
-
112
+ max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
114
113
  while True:
115
- poll_begin = time.time()
116
- poll.poll(timeout)
117
- timeout -= 1000 * (time.time() - poll_begin)
118
-
119
- if timeout <= 0:
114
+ if timeout < 0:
120
115
  raise TimeoutError("Timeout while waiting for the file content")
121
116
 
117
+ poll_begin = time.time()
118
+ # We poll for a very short time to be also able to check if the file was closed
119
+ # If the file is closed, we assume that we only have one writer so if we have
120
+ # data, we break out. This is to work around issues in macos
121
+ events = poll.poll(min(10, timeout * 1000))
122
+ timeout -= time.time() - poll_begin
123
+
122
124
  try:
123
- data = os.read(fifo_fd, 128)
124
- while data:
125
+ data = os.read(fifo_fd, 8192)
126
+ if data:
125
127
  content += data
126
- data = os.read(fifo_fd, 128)
127
-
128
- # Read from a non-blocking closed FIFO returns an empty byte array
129
- break
130
-
128
+ else:
129
+ if len(events):
130
+ # We read an EOF -- consider the file done
131
+ break
132
+ else:
133
+ # We had no events (just a timeout) and the read didn't return
134
+ # an exception so the file is still open; we continue waiting for data
135
+ # Unfortunately, on MacOS, it seems that even *after* the file is
136
+ # closed on the other end, we still don't get a BlockingIOError so
137
+ # we hack our way and timeout if there is no write in 30ms which is
138
+ # a relative eternity for file writes.
139
+ if content:
140
+ if max_timeout <= 0:
141
+ break
142
+ max_timeout -= 1
143
+ continue
131
144
  except BlockingIOError:
132
- # FIFO is open but no data is available yet
133
- continue
145
+ has_blocking_error = True
146
+ if content:
147
+ # The file was closed
148
+ break
149
+ # else, if we have no content, we continue waiting for the file to be open
150
+ # and written to.
134
151
 
135
152
  if not content and check_process_exited(command_obj):
136
153
  raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -156,7 +173,7 @@ async def async_read_from_fifo_when_ready(
156
173
  encoding : str, optional
157
174
  Encoding to use while reading the file, by default "utf-8".
158
175
  timeout : int, optional
159
- Timeout for reading the file in milliseconds, by default 3600.
176
+ Timeout for reading the file in seconds, by default 3600.
160
177
 
161
178
  Returns
162
179
  -------
@@ -206,7 +223,7 @@ def handle_timeout(
206
223
  command_obj : CommandManager
207
224
  Command manager object that encapsulates the running command details.
208
225
  file_read_timeout : int
209
- Timeout for reading the file.
226
+ Timeout for reading the file, in seconds
210
227
 
211
228
  Returns
212
229
  -------
@@ -243,7 +260,7 @@ async def async_handle_timeout(
243
260
  command_obj : CommandManager
244
261
  Command manager object that encapsulates the running command details.
245
262
  file_read_timeout : int
246
- Timeout for reading the file.
263
+ Timeout for reading the file, in seconds
247
264
 
248
265
  Returns
249
266
  -------
metaflow/runtime.py CHANGED
@@ -6,10 +6,12 @@ using local / remote processes
6
6
  """
7
7
 
8
8
  from __future__ import print_function
9
+ import json
9
10
  import os
10
11
  import sys
11
12
  import fcntl
12
13
  import re
14
+ import tempfile
13
15
  import time
14
16
  import subprocess
15
17
  from datetime import datetime
@@ -32,6 +34,7 @@ from . import procpoll
32
34
  from .datastore import TaskDataStoreSet
33
35
  from .debug import debug
34
36
  from .decorators import flow_decorators
37
+ from .flowspec import _FlowState
35
38
  from .mflog import mflog, RUNTIME_LOG_SOURCE
36
39
  from .util import to_unicode, compress_list, unicode_type
37
40
  from .clone_util import clone_task_helper
@@ -40,6 +43,10 @@ from .unbounded_foreach import (
40
43
  UBF_CONTROL,
41
44
  UBF_TASK,
42
45
  )
46
+
47
+ from .user_configs.config_options import ConfigInput
48
+ from .user_configs.config_parameters import dump_config_values
49
+
43
50
  import metaflow.tracing as tracing
44
51
 
45
52
  MAX_WORKERS = 16
@@ -471,82 +478,95 @@ class NativeRuntime(object):
471
478
  else:
472
479
  self._queue_push("start", {})
473
480
  progress_tstamp = time.time()
474
- try:
475
- # main scheduling loop
476
- exception = None
477
- while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
478
- # 1. are any of the current workers finished?
479
- if self._cloned_tasks:
480
- finished_tasks = self._cloned_tasks
481
- # reset the list of cloned tasks and let poll_workers handle
482
- # the remaining transition
483
- self._cloned_tasks = []
484
- else:
485
- finished_tasks = list(self._poll_workers())
486
- # 2. push new tasks triggered by the finished tasks to the queue
487
- self._queue_tasks(finished_tasks)
488
- # 3. if there are available worker slots, pop and start tasks
489
- # from the queue.
490
- self._launch_workers()
491
-
492
- if time.time() - progress_tstamp > PROGRESS_INTERVAL:
493
- progress_tstamp = time.time()
494
- tasks_print = ", ".join(
495
- [
496
- "%s (%d running; %d done)" % (k, v[0], v[1])
497
- for k, v in self._active_tasks.items()
498
- if k != 0 and v[0] > 0
499
- ]
500
- )
501
- if self._active_tasks[0] == 0:
502
- msg = "No tasks are running."
481
+ with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
482
+ # Configurations are passed through a file to avoid overloading the
483
+ # command-line. We only need to create this file once and it can be reused
484
+ # for any task launch
485
+ config_value = dump_config_values(self._flow)
486
+ if config_value:
487
+ json.dump(config_value, config_file)
488
+ config_file.flush()
489
+ self._config_file_name = config_file.name
490
+ else:
491
+ self._config_file_name = None
492
+ try:
493
+ # main scheduling loop
494
+ exception = None
495
+ while (
496
+ self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks
497
+ ):
498
+ # 1. are any of the current workers finished?
499
+ if self._cloned_tasks:
500
+ finished_tasks = self._cloned_tasks
501
+ # reset the list of cloned tasks and let poll_workers handle
502
+ # the remaining transition
503
+ self._cloned_tasks = []
503
504
  else:
504
- if self._active_tasks[0] == 1:
505
- msg = "1 task is running: "
505
+ finished_tasks = list(self._poll_workers())
506
+ # 2. push new tasks triggered by the finished tasks to the queue
507
+ self._queue_tasks(finished_tasks)
508
+ # 3. if there are available worker slots, pop and start tasks
509
+ # from the queue.
510
+ self._launch_workers()
511
+
512
+ if time.time() - progress_tstamp > PROGRESS_INTERVAL:
513
+ progress_tstamp = time.time()
514
+ tasks_print = ", ".join(
515
+ [
516
+ "%s (%d running; %d done)" % (k, v[0], v[1])
517
+ for k, v in self._active_tasks.items()
518
+ if k != 0 and v[0] > 0
519
+ ]
520
+ )
521
+ if self._active_tasks[0] == 0:
522
+ msg = "No tasks are running."
506
523
  else:
507
- msg = "%d tasks are running: " % self._active_tasks[0]
508
- msg += "%s." % tasks_print
524
+ if self._active_tasks[0] == 1:
525
+ msg = "1 task is running: "
526
+ else:
527
+ msg = "%d tasks are running: " % self._active_tasks[0]
528
+ msg += "%s." % tasks_print
509
529
 
510
- self._logger(msg, system_msg=True)
530
+ self._logger(msg, system_msg=True)
511
531
 
512
- if len(self._run_queue) == 0:
513
- msg = "No tasks are waiting in the queue."
514
- else:
515
- if len(self._run_queue) == 1:
516
- msg = "1 task is waiting in the queue: "
532
+ if len(self._run_queue) == 0:
533
+ msg = "No tasks are waiting in the queue."
517
534
  else:
518
- msg = "%d tasks are waiting in the queue." % len(
519
- self._run_queue
520
- )
535
+ if len(self._run_queue) == 1:
536
+ msg = "1 task is waiting in the queue: "
537
+ else:
538
+ msg = "%d tasks are waiting in the queue." % len(
539
+ self._run_queue
540
+ )
521
541
 
522
- self._logger(msg, system_msg=True)
523
- if len(self._unprocessed_steps) > 0:
524
- if len(self._unprocessed_steps) == 1:
525
- msg = "%s step has not started" % (
526
- next(iter(self._unprocessed_steps)),
527
- )
528
- else:
529
- msg = "%d steps have not started: " % len(
530
- self._unprocessed_steps
531
- )
532
- msg += "%s." % ", ".join(self._unprocessed_steps)
533
542
  self._logger(msg, system_msg=True)
534
-
535
- except KeyboardInterrupt as ex:
536
- self._logger("Workflow interrupted.", system_msg=True, bad=True)
537
- self._killall()
538
- exception = ex
539
- raise
540
- except Exception as ex:
541
- self._logger("Workflow failed.", system_msg=True, bad=True)
542
- self._killall()
543
- exception = ex
544
- raise
545
- finally:
546
- # on finish clean tasks
547
- for step in self._flow:
548
- for deco in step.decorators:
549
- deco.runtime_finished(exception)
543
+ if len(self._unprocessed_steps) > 0:
544
+ if len(self._unprocessed_steps) == 1:
545
+ msg = "%s step has not started" % (
546
+ next(iter(self._unprocessed_steps)),
547
+ )
548
+ else:
549
+ msg = "%d steps have not started: " % len(
550
+ self._unprocessed_steps
551
+ )
552
+ msg += "%s." % ", ".join(self._unprocessed_steps)
553
+ self._logger(msg, system_msg=True)
554
+
555
+ except KeyboardInterrupt as ex:
556
+ self._logger("Workflow interrupted.", system_msg=True, bad=True)
557
+ self._killall()
558
+ exception = ex
559
+ raise
560
+ except Exception as ex:
561
+ self._logger("Workflow failed.", system_msg=True, bad=True)
562
+ self._killall()
563
+ exception = ex
564
+ raise
565
+ finally:
566
+ # on finish clean tasks
567
+ for step in self._flow:
568
+ for deco in step.decorators:
569
+ deco.runtime_finished(exception)
550
570
 
551
571
  # assert that end was executed and it was successful
552
572
  if ("end", ()) in self._finished:
@@ -957,7 +977,7 @@ class NativeRuntime(object):
957
977
  )
958
978
  return
959
979
 
960
- worker = Worker(task, self._max_log_size)
980
+ worker = Worker(task, self._max_log_size, self._config_file_name)
961
981
  for fd in worker.fds():
962
982
  self._workers[fd] = worker
963
983
  self._poll.add(fd)
@@ -1237,7 +1257,6 @@ class Task(object):
1237
1257
  # Open the output datastore only if the task is not being cloned.
1238
1258
  if not self._is_cloned:
1239
1259
  self.new_attempt()
1240
-
1241
1260
  for deco in decos:
1242
1261
  deco.runtime_task_created(
1243
1262
  self._ds,
@@ -1504,6 +1523,15 @@ class CLIArgs(object):
1504
1523
  for deco in flow_decorators(self.task.flow):
1505
1524
  self.top_level_options.update(deco.get_top_level_options())
1506
1525
 
1526
+ # We also pass configuration options using the kv.<name> syntax which will cause
1527
+ # the configuration options to be loaded from the CONFIG file (or local-config-file
1528
+ # in the case of the local runtime)
1529
+ configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
1530
+ if configs:
1531
+ self.top_level_options["config-value"] = [
1532
+ (k, ConfigInput.make_key_name(k)) for k in configs
1533
+ ]
1534
+
1507
1535
  self.commands = ["step"]
1508
1536
  self.command_args = [self.task.step]
1509
1537
  self.command_options = {
@@ -1537,12 +1565,15 @@ class CLIArgs(object):
1537
1565
  for value in v:
1538
1566
  yield "--%s" % k
1539
1567
  if not isinstance(value, bool):
1540
- yield to_unicode(value)
1568
+ value = value if isinstance(value, tuple) else (value,)
1569
+ for vv in value:
1570
+ yield to_unicode(vv)
1541
1571
 
1542
1572
  args = list(self.entrypoint)
1543
1573
  args.extend(_options(self.top_level_options))
1544
1574
  args.extend(self.commands)
1545
1575
  args.extend(self.command_args)
1576
+
1546
1577
  args.extend(_options(self.command_options))
1547
1578
  return args
1548
1579
 
@@ -1554,8 +1585,9 @@ class CLIArgs(object):
1554
1585
 
1555
1586
 
1556
1587
  class Worker(object):
1557
- def __init__(self, task, max_logs_size):
1588
+ def __init__(self, task, max_logs_size, config_file_name):
1558
1589
  self.task = task
1590
+ self._config_file_name = config_file_name
1559
1591
  self._proc = self._launch()
1560
1592
 
1561
1593
  if task.retries > task.user_code_retries:
@@ -1607,6 +1639,12 @@ class Worker(object):
1607
1639
  self.task.user_code_retries,
1608
1640
  self.task.ubf_context,
1609
1641
  )
1642
+
1643
+ # Add user configurations using a file to avoid using up too much space on the
1644
+ # command line
1645
+ if self._config_file_name:
1646
+ args.top_level_options["local-config-file"] = self._config_file_name
1647
+ # Pass configuration options
1610
1648
  env.update(args.get_env())
1611
1649
  env["PYTHONUNBUFFERED"] = "x"
1612
1650
  tracing.inject_tracing_vars(env)
@@ -48,8 +48,8 @@ def process_messages(worker_type, worker):
48
48
  pass
49
49
 
50
50
 
51
- @tracing.cli_entrypoint("sidecar")
52
51
  @click.command(help="Initialize workers")
52
+ @tracing.cli_entrypoint("sidecar")
53
53
  @click.argument("worker-type")
54
54
  def main(worker_type):
55
55
  sidecar_type = SIDECARS.get(worker_type)
File without changes