ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (96) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cards.py +1 -0
  3. metaflow/cli.py +185 -717
  4. metaflow/cli_args.py +17 -0
  5. metaflow/cli_components/__init__.py +0 -0
  6. metaflow/cli_components/dump_cmd.py +96 -0
  7. metaflow/cli_components/init_cmd.py +51 -0
  8. metaflow/cli_components/run_cmds.py +362 -0
  9. metaflow/cli_components/step_cmd.py +176 -0
  10. metaflow/cli_components/utils.py +140 -0
  11. metaflow/cmd/develop/stub_generator.py +9 -2
  12. metaflow/datastore/flow_datastore.py +2 -2
  13. metaflow/decorators.py +63 -2
  14. metaflow/exception.py +8 -2
  15. metaflow/extension_support/plugins.py +42 -27
  16. metaflow/flowspec.py +176 -23
  17. metaflow/graph.py +28 -27
  18. metaflow/includefile.py +50 -22
  19. metaflow/lint.py +35 -20
  20. metaflow/metadata_provider/heartbeat.py +23 -8
  21. metaflow/metaflow_config.py +10 -1
  22. metaflow/multicore_utils.py +31 -14
  23. metaflow/package.py +17 -3
  24. metaflow/parameters.py +97 -25
  25. metaflow/plugins/__init__.py +22 -0
  26. metaflow/plugins/airflow/airflow.py +18 -17
  27. metaflow/plugins/airflow/airflow_cli.py +1 -0
  28. metaflow/plugins/argo/argo_client.py +0 -2
  29. metaflow/plugins/argo/argo_workflows.py +195 -132
  30. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  31. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  32. metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
  33. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  34. metaflow/plugins/aws/aws_utils.py +6 -1
  35. metaflow/plugins/aws/batch/batch_client.py +1 -3
  36. metaflow/plugins/aws/batch/batch_decorator.py +13 -13
  37. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  38. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  39. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  40. metaflow/plugins/aws/step_functions/step_functions.py +33 -1
  41. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  42. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  43. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
  44. metaflow/plugins/cards/card_cli.py +7 -2
  45. metaflow/plugins/cards/card_creator.py +1 -0
  46. metaflow/plugins/cards/card_decorator.py +79 -8
  47. metaflow/plugins/cards/card_modules/basic.py +56 -5
  48. metaflow/plugins/cards/card_modules/card.py +16 -1
  49. metaflow/plugins/cards/card_modules/components.py +64 -16
  50. metaflow/plugins/cards/card_modules/main.js +27 -25
  51. metaflow/plugins/cards/card_modules/test_cards.py +4 -4
  52. metaflow/plugins/cards/component_serializer.py +1 -1
  53. metaflow/plugins/datatools/s3/s3.py +12 -4
  54. metaflow/plugins/datatools/s3/s3op.py +3 -3
  55. metaflow/plugins/events_decorator.py +338 -186
  56. metaflow/plugins/kubernetes/kube_utils.py +84 -1
  57. metaflow/plugins/kubernetes/kubernetes.py +40 -92
  58. metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
  59. metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
  60. metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
  61. metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
  62. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  63. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  64. metaflow/plugins/parallel_decorator.py +4 -1
  65. metaflow/plugins/project_decorator.py +33 -5
  66. metaflow/plugins/pypi/bootstrap.py +249 -81
  67. metaflow/plugins/pypi/conda_decorator.py +20 -10
  68. metaflow/plugins/pypi/conda_environment.py +83 -27
  69. metaflow/plugins/pypi/micromamba.py +82 -37
  70. metaflow/plugins/pypi/pip.py +9 -6
  71. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  72. metaflow/plugins/pypi/utils.py +4 -2
  73. metaflow/plugins/timeout_decorator.py +2 -2
  74. metaflow/runner/click_api.py +240 -50
  75. metaflow/runner/deployer.py +1 -1
  76. metaflow/runner/deployer_impl.py +12 -11
  77. metaflow/runner/metaflow_runner.py +68 -34
  78. metaflow/runner/nbdeploy.py +2 -0
  79. metaflow/runner/nbrun.py +1 -1
  80. metaflow/runner/subprocess_manager.py +61 -10
  81. metaflow/runner/utils.py +208 -44
  82. metaflow/runtime.py +216 -112
  83. metaflow/sidecar/sidecar_worker.py +1 -1
  84. metaflow/tracing/tracing_modules.py +4 -1
  85. metaflow/user_configs/__init__.py +0 -0
  86. metaflow/user_configs/config_decorators.py +563 -0
  87. metaflow/user_configs/config_options.py +548 -0
  88. metaflow/user_configs/config_parameters.py +436 -0
  89. metaflow/util.py +22 -0
  90. metaflow/version.py +1 -1
  91. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
  92. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
  93. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
  94. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
  95. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
  96. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
metaflow/runner/nbrun.py CHANGED
@@ -44,7 +44,7 @@ class NBRunner(object):
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
47
- The timeout until which we try to read the runner attribute file.
47
+ The timeout until which we try to read the runner attribute file (in seconds).
48
48
  **kwargs : Any
49
49
  Additional arguments that you would pass to `python myflow.py` before
50
50
  the `run` command.
@@ -9,26 +9,61 @@ import tempfile
9
9
  import threading
10
10
  from typing import Callable, Dict, Iterator, List, Optional, Tuple
11
11
 
12
+ from .utils import check_process_exited
12
13
 
13
- def kill_process_and_descendants(pid, termination_timeout):
14
+
15
+ def kill_processes_and_descendants(pids: List[str], termination_timeout: float):
14
16
  # TODO: there's a race condition that new descendants might
15
17
  # spawn b/w the invocations of 'pkill' and 'kill'.
16
18
  # Needs to be fixed in future.
17
19
  try:
18
- subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
19
- subprocess.check_call(["kill", "-TERM", str(pid)])
20
+ subprocess.check_call(["pkill", "-TERM", "-P", *pids])
21
+ subprocess.check_call(["kill", "-TERM", *pids])
20
22
  except subprocess.CalledProcessError:
21
23
  pass
22
24
 
23
25
  time.sleep(termination_timeout)
24
26
 
25
27
  try:
26
- subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
27
- subprocess.check_call(["kill", "-KILL", str(pid)])
28
+ subprocess.check_call(["pkill", "-KILL", "-P", *pids])
29
+ subprocess.check_call(["kill", "-KILL", *pids])
28
30
  except subprocess.CalledProcessError:
29
31
  pass
30
32
 
31
33
 
34
+ async def async_kill_processes_and_descendants(
35
+ pids: List[str], termination_timeout: float
36
+ ):
37
+ # TODO: there's a race condition that new descendants might
38
+ # spawn b/w the invocations of 'pkill' and 'kill'.
39
+ # Needs to be fixed in future.
40
+ try:
41
+ sub_term = await asyncio.create_subprocess_exec("pkill", "-TERM", "-P", *pids)
42
+ await sub_term.wait()
43
+ except Exception:
44
+ pass
45
+
46
+ try:
47
+ main_term = await asyncio.create_subprocess_exec("kill", "-TERM", *pids)
48
+ await main_term.wait()
49
+ except Exception:
50
+ pass
51
+
52
+ await asyncio.sleep(termination_timeout)
53
+
54
+ try:
55
+ sub_kill = await asyncio.create_subprocess_exec("pkill", "-KILL", "-P", *pids)
56
+ await sub_kill.wait()
57
+ except Exception:
58
+ pass
59
+
60
+ try:
61
+ main_kill = await asyncio.create_subprocess_exec("kill", "-KILL", *pids)
62
+ await main_kill.wait()
63
+ except Exception:
64
+ pass
65
+
66
+
32
67
  class LogReadTimeoutError(Exception):
33
68
  """Exception raised when reading logs times out."""
34
69
 
@@ -46,14 +81,28 @@ class SubprocessManager(object):
46
81
  loop = asyncio.get_running_loop()
47
82
  loop.add_signal_handler(
48
83
  signal.SIGINT,
49
- lambda: self._handle_sigint(signum=signal.SIGINT, frame=None),
84
+ lambda: asyncio.create_task(self._async_handle_sigint()),
50
85
  )
51
86
  except RuntimeError:
52
87
  signal.signal(signal.SIGINT, self._handle_sigint)
53
88
 
89
+ async def _async_handle_sigint(self):
90
+ pids = [
91
+ str(command.process.pid)
92
+ for command in self.commands.values()
93
+ if command.process and not check_process_exited(command)
94
+ ]
95
+ if pids:
96
+ await async_kill_processes_and_descendants(pids, termination_timeout=2)
97
+
54
98
  def _handle_sigint(self, signum, frame):
55
- for each_command in self.commands.values():
56
- each_command.kill(termination_timeout=2)
99
+ pids = [
100
+ str(command.process.pid)
101
+ for command in self.commands.values()
102
+ if command.process and not check_process_exited(command)
103
+ ]
104
+ if pids:
105
+ kill_processes_and_descendants(pids, termination_timeout=2)
57
106
 
58
107
  async def __aenter__(self) -> "SubprocessManager":
59
108
  return self
@@ -71,6 +120,9 @@ class SubprocessManager(object):
71
120
  """
72
121
  Run a command synchronously and return its process ID.
73
122
 
123
+ Note: in no case does this wait for the process to *finish*. Use sync_wait()
124
+ to wait for the command to finish.
125
+
74
126
  Parameters
75
127
  ----------
76
128
  command : List[str]
@@ -96,7 +148,6 @@ class SubprocessManager(object):
96
148
  command_obj = CommandManager(command, env, cwd)
97
149
  pid = command_obj.run(show_output=show_output)
98
150
  self.commands[pid] = command_obj
99
- command_obj.sync_wait()
100
151
  return pid
101
152
 
102
153
  async def async_run_command(
@@ -472,7 +523,7 @@ class CommandManager(object):
472
523
  """
473
524
 
474
525
  if self.process is not None:
475
- kill_process_and_descendants(self.process.pid, termination_timeout)
526
+ kill_processes_and_descendants([str(self.process.pid)], termination_timeout)
476
527
  else:
477
528
  print("No process to kill.")
478
529
 
metaflow/runner/utils.py CHANGED
@@ -2,9 +2,11 @@ import os
2
2
  import ast
3
3
  import time
4
4
  import asyncio
5
-
5
+ import tempfile
6
+ import select
7
+ from contextlib import contextmanager
6
8
  from subprocess import CalledProcessError
7
- from typing import Any, Dict, TYPE_CHECKING
9
+ from typing import Any, Dict, TYPE_CHECKING, ContextManager, Tuple
8
10
 
9
11
  if TYPE_CHECKING:
10
12
  import tempfile
@@ -39,45 +41,216 @@ def format_flowfile(cell):
39
41
  return "\n".join(lines)
40
42
 
41
43
 
42
- def check_process_status(
44
+ def check_process_exited(
43
45
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
44
- ):
46
+ ) -> bool:
45
47
  if isinstance(command_obj.process, asyncio.subprocess.Process):
46
48
  return command_obj.process.returncode is not None
47
49
  else:
48
50
  return command_obj.process.poll() is not None
49
51
 
50
52
 
51
- def read_from_file_when_ready(
52
- file_path: str,
53
+ @contextmanager
54
+ def temporary_fifo() -> ContextManager[Tuple[str, int]]:
55
+ """
56
+ Create and open the read side of a temporary FIFO in a non-blocking mode.
57
+
58
+ Returns
59
+ -------
60
+ str
61
+ Path to the temporary FIFO.
62
+ int
63
+ File descriptor of the temporary FIFO.
64
+ """
65
+ with tempfile.TemporaryDirectory() as temp_dir:
66
+ path = os.path.join(temp_dir, "fifo")
67
+ os.mkfifo(path)
68
+ # Blocks until the write side is opened unless in non-blocking mode
69
+ fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
70
+ try:
71
+ yield path, fd
72
+ finally:
73
+ os.close(fd)
74
+
75
+
76
+ def read_from_fifo_when_ready(
77
+ fifo_fd: int,
53
78
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
54
- timeout: float = 5,
55
- ):
56
- start_time = time.time()
57
- with open(file_path, "r", encoding="utf-8") as file_pointer:
58
- content = file_pointer.read()
59
- while not content:
60
- if check_process_status(command_obj):
61
- # Check to make sure the file hasn't been read yet to avoid a race
62
- # where the file is written between the end of this while loop and the
63
- # poll call above.
64
- content = file_pointer.read()
65
- if content:
79
+ encoding: str = "utf-8",
80
+ timeout: int = 3600,
81
+ ) -> str:
82
+ """
83
+ Read the content from the FIFO file descriptor when it is ready.
84
+
85
+ Parameters
86
+ ----------
87
+ fifo_fd : int
88
+ File descriptor of the FIFO.
89
+ command_obj : CommandManager
90
+ Command manager object that handles the write side of the FIFO.
91
+ encoding : str, optional
92
+ Encoding to use while reading the file, by default "utf-8".
93
+ timeout : int, optional
94
+ Timeout for reading the file in seconds, by default 3600.
95
+
96
+ Returns
97
+ -------
98
+ str
99
+ Content read from the FIFO.
100
+
101
+ Raises
102
+ ------
103
+ TimeoutError
104
+ If no event occurs on the FIFO within the timeout.
105
+ CalledProcessError
106
+ If the process managed by `command_obj` has exited without writing any
107
+ content to the FIFO.
108
+ """
109
+ content = bytearray()
110
+ poll = select.poll()
111
+ poll.register(fifo_fd, select.POLLIN)
112
+ max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
113
+ while True:
114
+ if check_process_exited(command_obj) and command_obj.process.returncode != 0:
115
+ raise CalledProcessError(
116
+ command_obj.process.returncode, command_obj.command
117
+ )
118
+
119
+ if timeout < 0:
120
+ raise TimeoutError("Timeout while waiting for the file content")
121
+
122
+ poll_begin = time.time()
123
+ # We poll for a very short time to be also able to check if the file was closed
124
+ # If the file is closed, we assume that we only have one writer so if we have
125
+ # data, we break out. This is to work around issues in macos
126
+ events = poll.poll(min(10, timeout * 1000))
127
+ timeout -= time.time() - poll_begin
128
+
129
+ try:
130
+ data = os.read(fifo_fd, 8192)
131
+ if data:
132
+ content += data
133
+ else:
134
+ if len(events):
135
+ # We read an EOF -- consider the file done
66
136
  break
67
- raise CalledProcessError(
68
- command_obj.process.returncode, command_obj.command
69
- )
70
- if time.time() - start_time > timeout:
71
- raise TimeoutError(
72
- "Timeout while waiting for file content from '%s'" % file_path
73
- )
74
- time.sleep(0.1)
75
- content = file_pointer.read()
76
- return content
137
+ else:
138
+ # We had no events (just a timeout) and the read didn't return
139
+ # an exception so the file is still open; we continue waiting for data
140
+ # Unfortunately, on MacOS, it seems that even *after* the file is
141
+ # closed on the other end, we still don't get a BlockingIOError so
142
+ # we hack our way and timeout if there is no write in 30ms which is
143
+ # a relative eternity for file writes.
144
+ if content:
145
+ if max_timeout <= 0:
146
+ break
147
+ max_timeout -= 1
148
+ continue
149
+ except BlockingIOError:
150
+ has_blocking_error = True
151
+ if content:
152
+ # The file was closed
153
+ break
154
+ # else, if we have no content, we continue waiting for the file to be open
155
+ # and written to.
156
+
157
+ if not content and check_process_exited(command_obj):
158
+ raise CalledProcessError(command_obj.process.returncode, command_obj.command)
159
+
160
+ return content.decode(encoding)
161
+
162
+
163
+ async def async_read_from_fifo_when_ready(
164
+ fifo_fd: int,
165
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
166
+ encoding: str = "utf-8",
167
+ timeout: int = 3600,
168
+ ) -> str:
169
+ """
170
+ Read the content from the FIFO file descriptor when it is ready.
171
+
172
+ Parameters
173
+ ----------
174
+ fifo_fd : int
175
+ File descriptor of the FIFO.
176
+ command_obj : CommandManager
177
+ Command manager object that handles the write side of the FIFO.
178
+ encoding : str, optional
179
+ Encoding to use while reading the file, by default "utf-8".
180
+ timeout : int, optional
181
+ Timeout for reading the file in seconds, by default 3600.
182
+
183
+ Returns
184
+ -------
185
+ str
186
+ Content read from the FIFO.
187
+
188
+ Raises
189
+ ------
190
+ TimeoutError
191
+ If no event occurs on the FIFO within the timeout.
192
+ CalledProcessError
193
+ If the process managed by `command_obj` has exited without writing any
194
+ content to the FIFO.
195
+ """
196
+ return await asyncio.to_thread(
197
+ read_from_fifo_when_ready, fifo_fd, command_obj, encoding, timeout
198
+ )
199
+
200
+
201
+ def make_process_error_message(
202
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
203
+ ):
204
+ stdout_log = open(command_obj.log_files["stdout"], encoding="utf-8").read()
205
+ stderr_log = open(command_obj.log_files["stderr"], encoding="utf-8").read()
206
+ command = " ".join(command_obj.command)
207
+ error_message = "Error executing: '%s':\n" % command
208
+ if stdout_log.strip():
209
+ error_message += "\nStdout:\n%s\n" % stdout_log
210
+ if stderr_log.strip():
211
+ error_message += "\nStderr:\n%s\n" % stderr_log
212
+ return error_message
77
213
 
78
214
 
79
215
  def handle_timeout(
80
- tfp_runner_attribute: "tempfile._TemporaryFileWrapper[str]",
216
+ attribute_file_fd: int,
217
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
218
+ file_read_timeout: int,
219
+ ):
220
+ """
221
+ Handle the timeout for a running subprocess command that reads a file
222
+ and raises an error with appropriate logs if a TimeoutError occurs.
223
+
224
+ Parameters
225
+ ----------
226
+ attribute_file_fd : int
227
+ File descriptor belonging to the FIFO containing the attribute data.
228
+ command_obj : CommandManager
229
+ Command manager object that encapsulates the running command details.
230
+ file_read_timeout : int
231
+ Timeout for reading the file, in seconds
232
+
233
+ Returns
234
+ -------
235
+ str
236
+ Content read from the temporary file.
237
+
238
+ Raises
239
+ ------
240
+ RuntimeError
241
+ If a TimeoutError occurs, it raises a RuntimeError with the command's
242
+ stdout and stderr logs.
243
+ """
244
+ try:
245
+ return read_from_fifo_when_ready(
246
+ attribute_file_fd, command_obj=command_obj, timeout=file_read_timeout
247
+ )
248
+ except (CalledProcessError, TimeoutError) as e:
249
+ raise RuntimeError(make_process_error_message(command_obj)) from e
250
+
251
+
252
+ async def async_handle_timeout(
253
+ attribute_file_fd: "int",
81
254
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
82
255
  file_read_timeout: int,
83
256
  ):
@@ -87,12 +260,12 @@ def handle_timeout(
87
260
 
88
261
  Parameters
89
262
  ----------
90
- tfp_runner_attribute : NamedTemporaryFile
91
- Temporary file that stores runner attribute data.
263
+ attribute_file_fd : int
264
+ File descriptor belonging to the FIFO containing the attribute data.
92
265
  command_obj : CommandManager
93
266
  Command manager object that encapsulates the running command details.
94
267
  file_read_timeout : int
95
- Timeout for reading the file.
268
+ Timeout for reading the file, in seconds
96
269
 
97
270
  Returns
98
271
  -------
@@ -106,20 +279,11 @@ def handle_timeout(
106
279
  stdout and stderr logs.
107
280
  """
108
281
  try:
109
- content = read_from_file_when_ready(
110
- tfp_runner_attribute.name, command_obj, timeout=file_read_timeout
282
+ return await async_read_from_fifo_when_ready(
283
+ attribute_file_fd, command_obj=command_obj, timeout=file_read_timeout
111
284
  )
112
- return content
113
285
  except (CalledProcessError, TimeoutError) as e:
114
- stdout_log = open(command_obj.log_files["stdout"], encoding="utf-8").read()
115
- stderr_log = open(command_obj.log_files["stderr"], encoding="utf-8").read()
116
- command = " ".join(command_obj.command)
117
- error_message = "Error executing: '%s':\n" % command
118
- if stdout_log.strip():
119
- error_message += "\nStdout:\n%s\n" % stdout_log
120
- if stderr_log.strip():
121
- error_message += "\nStderr:\n%s\n" % stderr_log
122
- raise RuntimeError(error_message) from e
286
+ raise RuntimeError(make_process_error_message(command_obj)) from e
123
287
 
124
288
 
125
289
  def get_lower_level_group(