metaflow 2.12.33__py2.py3-none-any.whl → 2.12.35__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,11 @@ import importlib
2
2
  import json
3
3
  import os
4
4
  import sys
5
- import tempfile
6
5
 
7
6
  from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING, Type
8
7
 
9
8
  from .subprocess_manager import SubprocessManager
10
- from .utils import get_lower_level_group, handle_timeout
9
+ from .utils import get_lower_level_group, handle_timeout, temporary_fifo
11
10
 
12
11
  if TYPE_CHECKING:
13
12
  import metaflow.runner.deployer
@@ -121,14 +120,11 @@ class DeployerImpl(object):
121
120
  def _create(
122
121
  self, create_class: Type["metaflow.runner.deployer.DeployedFlow"], **kwargs
123
122
  ) -> "metaflow.runner.deployer.DeployedFlow":
124
- with tempfile.TemporaryDirectory() as temp_dir:
125
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
126
- dir=temp_dir, delete=False
127
- )
123
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
128
124
  # every subclass needs to have `self.deployer_kwargs`
129
125
  command = get_lower_level_group(
130
126
  self.api, self.top_level_kwargs, self.TYPE, self.deployer_kwargs
131
- ).create(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
127
+ ).create(deployer_attribute_file=attribute_file_path, **kwargs)
132
128
 
133
129
  pid = self.spm.run_command(
134
130
  [sys.executable, *command],
@@ -139,7 +135,7 @@ class DeployerImpl(object):
139
135
 
140
136
  command_obj = self.spm.get(pid)
141
137
  content = handle_timeout(
142
- tfp_runner_attribute, command_obj, self.file_read_timeout
138
+ attribute_file_fd, command_obj, self.file_read_timeout
143
139
  )
144
140
  content = json.loads(content)
145
141
  self.name = content.get("name")
@@ -2,13 +2,16 @@ import importlib
2
2
  import os
3
3
  import sys
4
4
  import json
5
- import tempfile
6
5
 
7
6
  from typing import Dict, Iterator, Optional, Tuple
8
7
 
9
8
  from metaflow import Run
10
9
 
11
- from .utils import handle_timeout
10
+ from .utils import (
11
+ temporary_fifo,
12
+ handle_timeout,
13
+ async_handle_timeout,
14
+ )
12
15
  from .subprocess_manager import CommandManager, SubprocessManager
13
16
 
14
17
 
@@ -267,9 +270,22 @@ class Runner(object):
267
270
  async def __aenter__(self) -> "Runner":
268
271
  return self
269
272
 
270
- def __get_executing_run(self, tfp_runner_attribute, command_obj):
271
- content = handle_timeout(
272
- tfp_runner_attribute, command_obj, self.file_read_timeout
273
+ def __get_executing_run(self, attribute_file_fd, command_obj):
274
+ content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
275
+ content = json.loads(content)
276
+ pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
277
+
278
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
279
+ metadata_for_flow = content.get("metadata")
280
+
281
+ run_object = Run(
282
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
283
+ )
284
+ return ExecutingRun(self, command_obj, run_object)
285
+
286
+ async def __async_get_executing_run(self, attribute_file_fd, command_obj):
287
+ content = await async_handle_timeout(
288
+ attribute_file_fd, command_obj, self.file_read_timeout
273
289
  )
274
290
  content = json.loads(content)
275
291
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
@@ -298,12 +314,9 @@ class Runner(object):
298
314
  ExecutingRun
299
315
  ExecutingRun containing the results of the run.
300
316
  """
301
- with tempfile.TemporaryDirectory() as temp_dir:
302
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
303
- dir=temp_dir, delete=False
304
- )
317
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
305
318
  command = self.api(**self.top_level_kwargs).run(
306
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
319
+ runner_attribute_file=attribute_file_path, **kwargs
307
320
  )
308
321
 
309
322
  pid = self.spm.run_command(
@@ -314,7 +327,7 @@ class Runner(object):
314
327
  )
315
328
  command_obj = self.spm.get(pid)
316
329
 
317
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
330
+ return self.__get_executing_run(attribute_file_fd, command_obj)
318
331
 
319
332
  def resume(self, **kwargs):
320
333
  """
@@ -332,12 +345,9 @@ class Runner(object):
332
345
  ExecutingRun
333
346
  ExecutingRun containing the results of the resumed run.
334
347
  """
335
- with tempfile.TemporaryDirectory() as temp_dir:
336
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
337
- dir=temp_dir, delete=False
338
- )
348
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
339
349
  command = self.api(**self.top_level_kwargs).resume(
340
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
350
+ runner_attribute_file=attribute_file_path, **kwargs
341
351
  )
342
352
 
343
353
  pid = self.spm.run_command(
@@ -348,7 +358,7 @@ class Runner(object):
348
358
  )
349
359
  command_obj = self.spm.get(pid)
350
360
 
351
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
361
+ return self.__get_executing_run(attribute_file_fd, command_obj)
352
362
 
353
363
  async def async_run(self, **kwargs) -> ExecutingRun:
354
364
  """
@@ -368,12 +378,9 @@ class Runner(object):
368
378
  ExecutingRun
369
379
  ExecutingRun representing the run that was started.
370
380
  """
371
- with tempfile.TemporaryDirectory() as temp_dir:
372
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
373
- dir=temp_dir, delete=False
374
- )
381
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
375
382
  command = self.api(**self.top_level_kwargs).run(
376
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
383
+ runner_attribute_file=attribute_file_path, **kwargs
377
384
  )
378
385
 
379
386
  pid = await self.spm.async_run_command(
@@ -383,7 +390,7 @@ class Runner(object):
383
390
  )
384
391
  command_obj = self.spm.get(pid)
385
392
 
386
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
393
+ return await self.__async_get_executing_run(attribute_file_fd, command_obj)
387
394
 
388
395
  async def async_resume(self, **kwargs):
389
396
  """
@@ -403,12 +410,9 @@ class Runner(object):
403
410
  ExecutingRun
404
411
  ExecutingRun representing the resumed run that was started.
405
412
  """
406
- with tempfile.TemporaryDirectory() as temp_dir:
407
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
408
- dir=temp_dir, delete=False
409
- )
413
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
410
414
  command = self.api(**self.top_level_kwargs).resume(
411
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
415
+ runner_attribute_file=attribute_file_path, **kwargs
412
416
  )
413
417
 
414
418
  pid = await self.spm.async_run_command(
@@ -418,7 +422,7 @@ class Runner(object):
418
422
  )
419
423
  command_obj = self.spm.get(pid)
420
424
 
421
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
425
+ return await self.__async_get_executing_run(attribute_file_fd, command_obj)
422
426
 
423
427
  def __exit__(self, exc_type, exc_value, traceback):
424
428
  self.spm.cleanup()
@@ -9,26 +9,61 @@ import tempfile
9
9
  import threading
10
10
  from typing import Callable, Dict, Iterator, List, Optional, Tuple
11
11
 
12
+ from .utils import check_process_exited
12
13
 
13
- def kill_process_and_descendants(pid, termination_timeout):
14
+
15
+ def kill_processes_and_descendants(pids: List[str], termination_timeout: float):
14
16
  # TODO: there's a race condition that new descendants might
15
17
  # spawn b/w the invocations of 'pkill' and 'kill'.
16
18
  # Needs to be fixed in future.
17
19
  try:
18
- subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
19
- subprocess.check_call(["kill", "-TERM", str(pid)])
20
+ subprocess.check_call(["pkill", "-TERM", "-P", *pids])
21
+ subprocess.check_call(["kill", "-TERM", *pids])
20
22
  except subprocess.CalledProcessError:
21
23
  pass
22
24
 
23
25
  time.sleep(termination_timeout)
24
26
 
25
27
  try:
26
- subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
27
- subprocess.check_call(["kill", "-KILL", str(pid)])
28
+ subprocess.check_call(["pkill", "-KILL", "-P", *pids])
29
+ subprocess.check_call(["kill", "-KILL", *pids])
28
30
  except subprocess.CalledProcessError:
29
31
  pass
30
32
 
31
33
 
34
+ async def async_kill_processes_and_descendants(
35
+ pids: List[str], termination_timeout: float
36
+ ):
37
+ # TODO: there's a race condition that new descendants might
38
+ # spawn b/w the invocations of 'pkill' and 'kill'.
39
+ # Needs to be fixed in future.
40
+ try:
41
+ sub_term = await asyncio.create_subprocess_exec("pkill", "-TERM", "-P", *pids)
42
+ await sub_term.wait()
43
+ except Exception:
44
+ pass
45
+
46
+ try:
47
+ main_term = await asyncio.create_subprocess_exec("kill", "-TERM", *pids)
48
+ await main_term.wait()
49
+ except Exception:
50
+ pass
51
+
52
+ await asyncio.sleep(termination_timeout)
53
+
54
+ try:
55
+ sub_kill = await asyncio.create_subprocess_exec("pkill", "-KILL", "-P", *pids)
56
+ await sub_kill.wait()
57
+ except Exception:
58
+ pass
59
+
60
+ try:
61
+ main_kill = await asyncio.create_subprocess_exec("kill", "-KILL", *pids)
62
+ await main_kill.wait()
63
+ except Exception:
64
+ pass
65
+
66
+
32
67
  class LogReadTimeoutError(Exception):
33
68
  """Exception raised when reading logs times out."""
34
69
 
@@ -46,14 +81,28 @@ class SubprocessManager(object):
46
81
  loop = asyncio.get_running_loop()
47
82
  loop.add_signal_handler(
48
83
  signal.SIGINT,
49
- lambda: self._handle_sigint(signum=signal.SIGINT, frame=None),
84
+ lambda: asyncio.create_task(self._async_handle_sigint()),
50
85
  )
51
86
  except RuntimeError:
52
87
  signal.signal(signal.SIGINT, self._handle_sigint)
53
88
 
89
+ async def _async_handle_sigint(self):
90
+ pids = [
91
+ str(command.process.pid)
92
+ for command in self.commands.values()
93
+ if command.process and not check_process_exited(command)
94
+ ]
95
+ if pids:
96
+ await async_kill_processes_and_descendants(pids, termination_timeout=2)
97
+
54
98
  def _handle_sigint(self, signum, frame):
55
- for each_command in self.commands.values():
56
- each_command.kill(termination_timeout=2)
99
+ pids = [
100
+ str(command.process.pid)
101
+ for command in self.commands.values()
102
+ if command.process and not check_process_exited(command)
103
+ ]
104
+ if pids:
105
+ kill_processes_and_descendants(pids, termination_timeout=2)
57
106
 
58
107
  async def __aenter__(self) -> "SubprocessManager":
59
108
  return self
@@ -472,7 +521,7 @@ class CommandManager(object):
472
521
  """
473
522
 
474
523
  if self.process is not None:
475
- kill_process_and_descendants(self.process.pid, termination_timeout)
524
+ kill_processes_and_descendants([str(self.process.pid)], termination_timeout)
476
525
  else:
477
526
  print("No process to kill.")
478
527
 
metaflow/runner/utils.py CHANGED
@@ -2,9 +2,11 @@ import os
2
2
  import ast
3
3
  import time
4
4
  import asyncio
5
-
5
+ import tempfile
6
+ import select
7
+ from contextlib import contextmanager
6
8
  from subprocess import CalledProcessError
7
- from typing import Any, Dict, TYPE_CHECKING
9
+ from typing import Any, Dict, TYPE_CHECKING, ContextManager, Tuple
8
10
 
9
11
  if TYPE_CHECKING:
10
12
  import tempfile
@@ -39,45 +41,194 @@ def format_flowfile(cell):
39
41
  return "\n".join(lines)
40
42
 
41
43
 
42
- def check_process_status(
44
+ def check_process_exited(
43
45
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
44
- ):
46
+ ) -> bool:
45
47
  if isinstance(command_obj.process, asyncio.subprocess.Process):
46
48
  return command_obj.process.returncode is not None
47
49
  else:
48
50
  return command_obj.process.poll() is not None
49
51
 
50
52
 
51
- def read_from_file_when_ready(
52
- file_path: str,
53
+ @contextmanager
54
+ def temporary_fifo() -> ContextManager[Tuple[str, int]]:
55
+ """
56
+ Create and open the read side of a temporary FIFO in a non-blocking mode.
57
+
58
+ Returns
59
+ -------
60
+ str
61
+ Path to the temporary FIFO.
62
+ int
63
+ File descriptor of the temporary FIFO.
64
+ """
65
+ with tempfile.TemporaryDirectory() as temp_dir:
66
+ path = os.path.join(temp_dir, "fifo")
67
+ os.mkfifo(path)
68
+ # Blocks until the write side is opened unless in non-blocking mode
69
+ fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
70
+ try:
71
+ yield path, fd
72
+ finally:
73
+ os.close(fd)
74
+
75
+
76
+ def read_from_fifo_when_ready(
77
+ fifo_fd: int,
78
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
79
+ encoding: str = "utf-8",
80
+ timeout: int = 3600,
81
+ ) -> str:
82
+ """
83
+ Read the content from the FIFO file descriptor when it is ready.
84
+
85
+ Parameters
86
+ ----------
87
+ fifo_fd : int
88
+ File descriptor of the FIFO.
89
+ command_obj : CommandManager
90
+ Command manager object that handles the write side of the FIFO.
91
+ encoding : str, optional
92
+ Encoding to use while reading the file, by default "utf-8".
93
+ timeout : int, optional
94
+ Timeout for reading the file in milliseconds, by default 3600.
95
+
96
+ Returns
97
+ -------
98
+ str
99
+ Content read from the FIFO.
100
+
101
+ Raises
102
+ ------
103
+ TimeoutError
104
+ If no event occurs on the FIFO within the timeout.
105
+ CalledProcessError
106
+ If the process managed by `command_obj` has exited without writing any
107
+ content to the FIFO.
108
+ """
109
+ content = bytearray()
110
+
111
+ poll = select.poll()
112
+ poll.register(fifo_fd, select.POLLIN)
113
+
114
+ while True:
115
+ poll_begin = time.time()
116
+ poll.poll(timeout)
117
+ timeout -= 1000 * (time.time() - poll_begin)
118
+
119
+ if timeout <= 0:
120
+ raise TimeoutError("Timeout while waiting for the file content")
121
+
122
+ try:
123
+ data = os.read(fifo_fd, 128)
124
+ while data:
125
+ content += data
126
+ data = os.read(fifo_fd, 128)
127
+
128
+ # Read from a non-blocking closed FIFO returns an empty byte array
129
+ break
130
+
131
+ except BlockingIOError:
132
+ # FIFO is open but no data is available yet
133
+ continue
134
+
135
+ if not content and check_process_exited(command_obj):
136
+ raise CalledProcessError(command_obj.process.returncode, command_obj.command)
137
+
138
+ return content.decode(encoding)
139
+
140
+
141
+ async def async_read_from_fifo_when_ready(
142
+ fifo_fd: int,
143
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
144
+ encoding: str = "utf-8",
145
+ timeout: int = 3600,
146
+ ) -> str:
147
+ """
148
+ Read the content from the FIFO file descriptor when it is ready.
149
+
150
+ Parameters
151
+ ----------
152
+ fifo_fd : int
153
+ File descriptor of the FIFO.
154
+ command_obj : CommandManager
155
+ Command manager object that handles the write side of the FIFO.
156
+ encoding : str, optional
157
+ Encoding to use while reading the file, by default "utf-8".
158
+ timeout : int, optional
159
+ Timeout for reading the file in milliseconds, by default 3600.
160
+
161
+ Returns
162
+ -------
163
+ str
164
+ Content read from the FIFO.
165
+
166
+ Raises
167
+ ------
168
+ TimeoutError
169
+ If no event occurs on the FIFO within the timeout.
170
+ CalledProcessError
171
+ If the process managed by `command_obj` has exited without writing any
172
+ content to the FIFO.
173
+ """
174
+ return await asyncio.to_thread(
175
+ read_from_fifo_when_ready, fifo_fd, command_obj, encoding, timeout
176
+ )
177
+
178
+
179
+ def make_process_error_message(
53
180
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
54
- timeout: float = 5,
55
181
  ):
56
- start_time = time.time()
57
- with open(file_path, "r", encoding="utf-8") as file_pointer:
58
- content = file_pointer.read()
59
- while not content:
60
- if check_process_status(command_obj):
61
- # Check to make sure the file hasn't been read yet to avoid a race
62
- # where the file is written between the end of this while loop and the
63
- # poll call above.
64
- content = file_pointer.read()
65
- if content:
66
- break
67
- raise CalledProcessError(
68
- command_obj.process.returncode, command_obj.command
69
- )
70
- if time.time() - start_time > timeout:
71
- raise TimeoutError(
72
- "Timeout while waiting for file content from '%s'" % file_path
73
- )
74
- time.sleep(0.1)
75
- content = file_pointer.read()
76
- return content
182
+ stdout_log = open(command_obj.log_files["stdout"], encoding="utf-8").read()
183
+ stderr_log = open(command_obj.log_files["stderr"], encoding="utf-8").read()
184
+ command = " ".join(command_obj.command)
185
+ error_message = "Error executing: '%s':\n" % command
186
+ if stdout_log.strip():
187
+ error_message += "\nStdout:\n%s\n" % stdout_log
188
+ if stderr_log.strip():
189
+ error_message += "\nStderr:\n%s\n" % stderr_log
190
+ return error_message
77
191
 
78
192
 
79
193
  def handle_timeout(
80
- tfp_runner_attribute: "tempfile._TemporaryFileWrapper[str]",
194
+ attribute_file_fd: int,
195
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
196
+ file_read_timeout: int,
197
+ ):
198
+ """
199
+ Handle the timeout for a running subprocess command that reads a file
200
+ and raises an error with appropriate logs if a TimeoutError occurs.
201
+
202
+ Parameters
203
+ ----------
204
+ attribute_file_fd : int
205
+ File descriptor belonging to the FIFO containing the attribute data.
206
+ command_obj : CommandManager
207
+ Command manager object that encapsulates the running command details.
208
+ file_read_timeout : int
209
+ Timeout for reading the file.
210
+
211
+ Returns
212
+ -------
213
+ str
214
+ Content read from the temporary file.
215
+
216
+ Raises
217
+ ------
218
+ RuntimeError
219
+ If a TimeoutError occurs, it raises a RuntimeError with the command's
220
+ stdout and stderr logs.
221
+ """
222
+ try:
223
+ return read_from_fifo_when_ready(
224
+ attribute_file_fd, command_obj=command_obj, timeout=file_read_timeout
225
+ )
226
+ except (CalledProcessError, TimeoutError) as e:
227
+ raise RuntimeError(make_process_error_message(command_obj)) from e
228
+
229
+
230
+ async def async_handle_timeout(
231
+ attribute_file_fd: "int",
81
232
  command_obj: "metaflow.runner.subprocess_manager.CommandManager",
82
233
  file_read_timeout: int,
83
234
  ):
@@ -87,8 +238,8 @@ def handle_timeout(
87
238
 
88
239
  Parameters
89
240
  ----------
90
- tfp_runner_attribute : NamedTemporaryFile
91
- Temporary file that stores runner attribute data.
241
+ attribute_file_fd : int
242
+ File descriptor belonging to the FIFO containing the attribute data.
92
243
  command_obj : CommandManager
93
244
  Command manager object that encapsulates the running command details.
94
245
  file_read_timeout : int
@@ -106,20 +257,11 @@ def handle_timeout(
106
257
  stdout and stderr logs.
107
258
  """
108
259
  try:
109
- content = read_from_file_when_ready(
110
- tfp_runner_attribute.name, command_obj, timeout=file_read_timeout
260
+ return await async_read_from_fifo_when_ready(
261
+ attribute_file_fd, command_obj=command_obj, timeout=file_read_timeout
111
262
  )
112
- return content
113
263
  except (CalledProcessError, TimeoutError) as e:
114
- stdout_log = open(command_obj.log_files["stdout"], encoding="utf-8").read()
115
- stderr_log = open(command_obj.log_files["stderr"], encoding="utf-8").read()
116
- command = " ".join(command_obj.command)
117
- error_message = "Error executing: '%s':\n" % command
118
- if stdout_log.strip():
119
- error_message += "\nStdout:\n%s\n" % stdout_log
120
- if stderr_log.strip():
121
- error_message += "\nStderr:\n%s\n" % stderr_log
122
- raise RuntimeError(error_message) from e
264
+ raise RuntimeError(make_process_error_message(command_obj)) from e
123
265
 
124
266
 
125
267
  def get_lower_level_group(
metaflow/util.py CHANGED
@@ -436,12 +436,17 @@ def to_pod(value):
436
436
  Value to convert to POD format. The value can be a string, number, list,
437
437
  dictionary, or a nested structure of these types.
438
438
  """
439
+ # Prevent circular imports
440
+ from metaflow.parameters import DeployTimeField
441
+
439
442
  if isinstance(value, (str, int, float)):
440
443
  return value
441
444
  if isinstance(value, dict):
442
445
  return {to_pod(k): to_pod(v) for k, v in value.items()}
443
446
  if isinstance(value, (list, set, tuple)):
444
447
  return [to_pod(v) for v in value]
448
+ if isinstance(value, DeployTimeField):
449
+ return value.print_representation
445
450
  return str(value)
446
451
 
447
452
 
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.12.33"
1
+ metaflow_version = "2.12.35"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metaflow
3
- Version: 2.12.33
3
+ Version: 2.12.35
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Metaflow Developers
6
6
  Author-email: help@metaflow.org
@@ -26,7 +26,7 @@ License-File: LICENSE
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: boto3
28
28
  Provides-Extra: stubs
29
- Requires-Dist: metaflow-stubs==2.12.33; extra == "stubs"
29
+ Requires-Dist: metaflow-stubs==2.12.35; extra == "stubs"
30
30
 
31
31
  ![Metaflow_Logo_Horizontal_FullColor_Ribbon_Dark_RGB](https://user-images.githubusercontent.com/763451/89453116-96a57e00-d713-11ea-9fa6-82b29d4d6eff.png)
32
32