metaflow 2.11.15__py2.py3-none-any.whl → 2.12.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +8 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +906 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +237 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +307 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/typeguard/_suppression.py +88 -0
- metaflow/_vendor/typeguard/_transformer.py +1193 -0
- metaflow/_vendor/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/typeguard/_utils.py +169 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3053 -0
- metaflow/cli.py +48 -36
- metaflow/clone_util.py +6 -0
- metaflow/cmd/develop/stubs.py +2 -0
- metaflow/extension_support/__init__.py +2 -0
- metaflow/extension_support/plugins.py +2 -0
- metaflow/metaflow_config.py +24 -0
- metaflow/metaflow_environment.py +2 -2
- metaflow/parameters.py +1 -0
- metaflow/plugins/__init__.py +19 -0
- metaflow/plugins/airflow/airflow.py +7 -0
- metaflow/plugins/argo/argo_workflows.py +17 -0
- metaflow/plugins/aws/batch/batch_decorator.py +3 -3
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/azure/azure_credential.py +53 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_utils.py +2 -35
- metaflow/plugins/azure/blob_service_client_factory.py +4 -2
- metaflow/plugins/datastores/azure_storage.py +6 -6
- metaflow/plugins/datatools/s3/s3.py +1 -1
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
- metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
- metaflow/plugins/kubernetes/kubernetes.py +85 -8
- metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
- metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +208 -206
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
- metaflow/plugins/timeout_decorator.py +2 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +406 -0
- metaflow/runner/metaflow_runner.py +452 -0
- metaflow/runner/nbrun.py +246 -0
- metaflow/runner/subprocess_manager.py +552 -0
- metaflow/task.py +1 -12
- metaflow/tuple_util.py +27 -0
- metaflow/util.py +0 -15
- metaflow/vendor.py +0 -1
- metaflow/version.py +1 -1
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/METADATA +2 -2
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/RECORD +72 -39
- metaflow/_vendor/v3_7/__init__.py +0 -1
- /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/LICENSE +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/WHEEL +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,552 @@
|
|
1
|
+
import asyncio
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
import signal
|
5
|
+
import subprocess
|
6
|
+
import sys
|
7
|
+
import tempfile
|
8
|
+
import threading
|
9
|
+
import time
|
10
|
+
from typing import Callable, Dict, Iterator, List, Optional, Tuple
|
11
|
+
|
12
|
+
|
13
|
+
def kill_process_and_descendants(pid, termination_timeout):
|
14
|
+
try:
|
15
|
+
subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
|
16
|
+
except subprocess.CalledProcessError:
|
17
|
+
pass
|
18
|
+
|
19
|
+
time.sleep(termination_timeout)
|
20
|
+
|
21
|
+
try:
|
22
|
+
subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
|
23
|
+
except subprocess.CalledProcessError:
|
24
|
+
pass
|
25
|
+
|
26
|
+
|
27
|
+
class LogReadTimeoutError(Exception):
|
28
|
+
"""Exception raised when reading logs times out."""
|
29
|
+
|
30
|
+
|
31
|
+
class SubprocessManager(object):
|
32
|
+
"""
|
33
|
+
A manager for subprocesses. The subprocess manager manages one or more
|
34
|
+
CommandManager objects, each of which manages an individual subprocess.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self):
|
38
|
+
self.commands: Dict[int, CommandManager] = {}
|
39
|
+
|
40
|
+
async def __aenter__(self) -> "SubprocessManager":
|
41
|
+
return self
|
42
|
+
|
43
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
44
|
+
self.cleanup()
|
45
|
+
|
46
|
+
def run_command(
|
47
|
+
self,
|
48
|
+
command: List[str],
|
49
|
+
env: Optional[Dict[str, str]] = None,
|
50
|
+
cwd: Optional[str] = None,
|
51
|
+
show_output: bool = False,
|
52
|
+
) -> int:
|
53
|
+
"""
|
54
|
+
Run a command synchronously and return its process ID.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
command : List[str]
|
59
|
+
The command to run in List form.
|
60
|
+
env : Optional[Dict[str, str]], default None
|
61
|
+
Environment variables to set for the subprocess; if not specified,
|
62
|
+
the current enviornment variables are used.
|
63
|
+
cwd : Optional[str], default None
|
64
|
+
The directory to run the subprocess in; if not specified, the current
|
65
|
+
directory is used.
|
66
|
+
show_output : bool, default False
|
67
|
+
Suppress the 'stdout' and 'stderr' to the console by default.
|
68
|
+
They can be accessed later by reading the files present in the
|
69
|
+
CommandManager object:
|
70
|
+
- command_obj.log_files["stdout"]
|
71
|
+
- command_obj.log_files["stderr"]
|
72
|
+
Returns
|
73
|
+
-------
|
74
|
+
int
|
75
|
+
The process ID of the subprocess.
|
76
|
+
"""
|
77
|
+
|
78
|
+
command_obj = CommandManager(command, env, cwd)
|
79
|
+
pid = command_obj.run(show_output=show_output)
|
80
|
+
self.commands[pid] = command_obj
|
81
|
+
return pid
|
82
|
+
|
83
|
+
async def async_run_command(
|
84
|
+
self,
|
85
|
+
command: List[str],
|
86
|
+
env: Optional[Dict[str, str]] = None,
|
87
|
+
cwd: Optional[str] = None,
|
88
|
+
) -> int:
|
89
|
+
"""
|
90
|
+
Run a command asynchronously and return its process ID.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
command : List[str]
|
95
|
+
The command to run in List form.
|
96
|
+
env : Optional[Dict[str, str]], default None
|
97
|
+
Environment variables to set for the subprocess; if not specified,
|
98
|
+
the current enviornment variables are used.
|
99
|
+
cwd : Optional[str], default None
|
100
|
+
The directory to run the subprocess in; if not specified, the current
|
101
|
+
directory is used.
|
102
|
+
|
103
|
+
Returns
|
104
|
+
-------
|
105
|
+
int
|
106
|
+
The process ID of the subprocess.
|
107
|
+
"""
|
108
|
+
|
109
|
+
command_obj = CommandManager(command, env, cwd)
|
110
|
+
pid = await command_obj.async_run()
|
111
|
+
self.commands[pid] = command_obj
|
112
|
+
return pid
|
113
|
+
|
114
|
+
def get(self, pid: int) -> Optional["CommandManager"]:
|
115
|
+
"""
|
116
|
+
Get one of the CommandManager managed by this SubprocessManager.
|
117
|
+
|
118
|
+
Parameters
|
119
|
+
----------
|
120
|
+
pid : int
|
121
|
+
The process ID of the subprocess (returned by run_command or async_run_command).
|
122
|
+
|
123
|
+
Returns
|
124
|
+
-------
|
125
|
+
Optional[CommandManager]
|
126
|
+
The CommandManager object for the given process ID, or None if not found.
|
127
|
+
"""
|
128
|
+
return self.commands.get(pid, None)
|
129
|
+
|
130
|
+
def cleanup(self) -> None:
|
131
|
+
"""Clean up log files for all running subprocesses."""
|
132
|
+
|
133
|
+
for v in self.commands.values():
|
134
|
+
v.cleanup()
|
135
|
+
|
136
|
+
|
137
|
+
class CommandManager(object):
|
138
|
+
"""A manager for an individual subprocess."""
|
139
|
+
|
140
|
+
def __init__(
|
141
|
+
self,
|
142
|
+
command: List[str],
|
143
|
+
env: Optional[Dict[str, str]] = None,
|
144
|
+
cwd: Optional[str] = None,
|
145
|
+
):
|
146
|
+
"""
|
147
|
+
Create a new CommandManager object.
|
148
|
+
This does not run the process itself but sets it up.
|
149
|
+
|
150
|
+
Parameters
|
151
|
+
----------
|
152
|
+
command : List[str]
|
153
|
+
The command to run in List form.
|
154
|
+
env : Optional[Dict[str, str]], default None
|
155
|
+
Environment variables to set for the subprocess; if not specified,
|
156
|
+
the current enviornment variables are used.
|
157
|
+
cwd : Optional[str], default None
|
158
|
+
The directory to run the subprocess in; if not specified, the current
|
159
|
+
directory is used.
|
160
|
+
"""
|
161
|
+
self.command = command
|
162
|
+
|
163
|
+
self.env = env if env is not None else os.environ.copy()
|
164
|
+
self.cwd = cwd if cwd is not None else os.getcwd()
|
165
|
+
|
166
|
+
self.process = None
|
167
|
+
self.run_called: bool = False
|
168
|
+
self.log_files: Dict[str, str] = {}
|
169
|
+
|
170
|
+
signal.signal(signal.SIGINT, self._handle_sigint)
|
171
|
+
|
172
|
+
async def __aenter__(self) -> "CommandManager":
|
173
|
+
return self
|
174
|
+
|
175
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
176
|
+
self.cleanup()
|
177
|
+
|
178
|
+
async def wait(
|
179
|
+
self, timeout: Optional[float] = None, stream: Optional[str] = None
|
180
|
+
) -> None:
|
181
|
+
"""
|
182
|
+
Wait for the subprocess to finish, optionally with a timeout
|
183
|
+
and optionally streaming its output.
|
184
|
+
|
185
|
+
You can only call `wait` if `async_run` has already been called.
|
186
|
+
|
187
|
+
Parameters
|
188
|
+
----------
|
189
|
+
timeout : Optional[float], default None
|
190
|
+
The maximum time to wait for the subprocess to finish.
|
191
|
+
If the timeout is reached, the subprocess is killed.
|
192
|
+
stream : Optional[str], default None
|
193
|
+
If specified, the specified stream is printed to stdout. `stream` can
|
194
|
+
be one of `stdout` or `stderr`.
|
195
|
+
"""
|
196
|
+
|
197
|
+
if not self.run_called:
|
198
|
+
raise RuntimeError("No command run yet to wait for...")
|
199
|
+
|
200
|
+
if timeout is None:
|
201
|
+
if stream is None:
|
202
|
+
await self.process.wait()
|
203
|
+
else:
|
204
|
+
await self.emit_logs(stream)
|
205
|
+
else:
|
206
|
+
try:
|
207
|
+
if stream is None:
|
208
|
+
await asyncio.wait_for(self.process.wait(), timeout)
|
209
|
+
else:
|
210
|
+
await asyncio.wait_for(self.emit_logs(stream), timeout)
|
211
|
+
except asyncio.TimeoutError:
|
212
|
+
command_string = " ".join(self.command)
|
213
|
+
await self.kill()
|
214
|
+
print(
|
215
|
+
"Timeout: The process (PID %d; command: '%s') did not complete "
|
216
|
+
"within %s seconds." % (self.process.pid, command_string, timeout)
|
217
|
+
)
|
218
|
+
|
219
|
+
def run(self, show_output: bool = False):
|
220
|
+
"""
|
221
|
+
Run the subprocess synchronously. This can only be called once.
|
222
|
+
|
223
|
+
This also waits on the process implicitly.
|
224
|
+
|
225
|
+
Parameters
|
226
|
+
----------
|
227
|
+
show_output : bool, default False
|
228
|
+
Suppress the 'stdout' and 'stderr' to the console by default.
|
229
|
+
They can be accessed later by reading the files present in:
|
230
|
+
- self.log_files["stdout"]
|
231
|
+
- self.log_files["stderr"]
|
232
|
+
"""
|
233
|
+
|
234
|
+
if not self.run_called:
|
235
|
+
self.temp_dir = tempfile.mkdtemp()
|
236
|
+
stdout_logfile = os.path.join(self.temp_dir, "stdout.log")
|
237
|
+
stderr_logfile = os.path.join(self.temp_dir, "stderr.log")
|
238
|
+
|
239
|
+
def stream_to_stdout_and_file(pipe, log_file):
|
240
|
+
with open(log_file, "w") as file:
|
241
|
+
for line in iter(pipe.readline, ""):
|
242
|
+
if show_output:
|
243
|
+
sys.stdout.write(line)
|
244
|
+
file.write(line)
|
245
|
+
pipe.close()
|
246
|
+
|
247
|
+
try:
|
248
|
+
self.process = subprocess.Popen(
|
249
|
+
self.command,
|
250
|
+
cwd=self.cwd,
|
251
|
+
env=self.env,
|
252
|
+
stdout=subprocess.PIPE,
|
253
|
+
stderr=subprocess.PIPE,
|
254
|
+
bufsize=1,
|
255
|
+
universal_newlines=True,
|
256
|
+
)
|
257
|
+
|
258
|
+
self.log_files["stdout"] = stdout_logfile
|
259
|
+
self.log_files["stderr"] = stderr_logfile
|
260
|
+
|
261
|
+
self.run_called = True
|
262
|
+
|
263
|
+
stdout_thread = threading.Thread(
|
264
|
+
target=stream_to_stdout_and_file,
|
265
|
+
args=(self.process.stdout, stdout_logfile),
|
266
|
+
)
|
267
|
+
stderr_thread = threading.Thread(
|
268
|
+
target=stream_to_stdout_and_file,
|
269
|
+
args=(self.process.stderr, stderr_logfile),
|
270
|
+
)
|
271
|
+
|
272
|
+
stdout_thread.start()
|
273
|
+
stderr_thread.start()
|
274
|
+
|
275
|
+
self.process.wait()
|
276
|
+
|
277
|
+
stdout_thread.join()
|
278
|
+
stderr_thread.join()
|
279
|
+
|
280
|
+
return self.process.pid
|
281
|
+
except Exception as e:
|
282
|
+
print("Error starting subprocess: %s" % e)
|
283
|
+
self.cleanup()
|
284
|
+
else:
|
285
|
+
command_string = " ".join(self.command)
|
286
|
+
print(
|
287
|
+
"Command '%s' has already been called. Please create another "
|
288
|
+
"CommandManager object." % command_string
|
289
|
+
)
|
290
|
+
|
291
|
+
async def async_run(self):
|
292
|
+
"""
|
293
|
+
Run the subprocess asynchronously. This can only be called once.
|
294
|
+
|
295
|
+
Once this is called, you can then wait on the process (using `wait`), stream
|
296
|
+
logs (using `stream_logs`) or kill it (using `kill`).
|
297
|
+
"""
|
298
|
+
|
299
|
+
if not self.run_called:
|
300
|
+
self.temp_dir = tempfile.mkdtemp()
|
301
|
+
stdout_logfile = os.path.join(self.temp_dir, "stdout.log")
|
302
|
+
stderr_logfile = os.path.join(self.temp_dir, "stderr.log")
|
303
|
+
|
304
|
+
try:
|
305
|
+
# returns when process has been started,
|
306
|
+
# not when it is finished...
|
307
|
+
self.process = await asyncio.create_subprocess_exec(
|
308
|
+
*self.command,
|
309
|
+
cwd=self.cwd,
|
310
|
+
env=self.env,
|
311
|
+
stdout=open(stdout_logfile, "w", encoding="utf-8"),
|
312
|
+
stderr=open(stderr_logfile, "w", encoding="utf-8"),
|
313
|
+
)
|
314
|
+
|
315
|
+
self.log_files["stdout"] = stdout_logfile
|
316
|
+
self.log_files["stderr"] = stderr_logfile
|
317
|
+
|
318
|
+
self.run_called = True
|
319
|
+
return self.process.pid
|
320
|
+
except Exception as e:
|
321
|
+
print("Error starting subprocess: %s" % e)
|
322
|
+
self.cleanup()
|
323
|
+
else:
|
324
|
+
command_string = " ".join(self.command)
|
325
|
+
print(
|
326
|
+
"Command '%s' has already been called. Please create another "
|
327
|
+
"CommandManager object." % command_string
|
328
|
+
)
|
329
|
+
|
330
|
+
async def stream_log(
|
331
|
+
self,
|
332
|
+
stream: str,
|
333
|
+
position: Optional[int] = None,
|
334
|
+
timeout_per_line: Optional[float] = None,
|
335
|
+
log_write_delay: float = 0.01,
|
336
|
+
) -> Iterator[Tuple[int, str]]:
|
337
|
+
"""
|
338
|
+
Stream logs from the subprocess line by line.
|
339
|
+
|
340
|
+
Parameters
|
341
|
+
----------
|
342
|
+
stream : str
|
343
|
+
The stream to stream logs from. Can be one of "stdout" or "stderr".
|
344
|
+
position : Optional[int], default None
|
345
|
+
The position in the log file to start streaming from. If None, it starts
|
346
|
+
from the beginning of the log file. This allows resuming streaming from
|
347
|
+
a previously known position
|
348
|
+
timeout_per_line : Optional[float], default None
|
349
|
+
The time to wait for a line to be read from the log file. If None, it
|
350
|
+
waits indefinitely. If the timeout is reached, a LogReadTimeoutError
|
351
|
+
is raised. Note that this timeout is *per line* and not cumulative so this
|
352
|
+
function may take significantly more time than `timeout_per_line`
|
353
|
+
log_write_delay : float, default 0.01
|
354
|
+
Improves the probability of getting whole lines. This setting is for
|
355
|
+
advanced use cases.
|
356
|
+
|
357
|
+
Yields
|
358
|
+
------
|
359
|
+
Tuple[int, str]
|
360
|
+
A tuple containing the position in the log file and the line read. The
|
361
|
+
position returned can be used to feed into another `stream_logs` call
|
362
|
+
for example.
|
363
|
+
"""
|
364
|
+
|
365
|
+
if not self.run_called:
|
366
|
+
raise RuntimeError("No command run yet to get the logs for...")
|
367
|
+
|
368
|
+
if stream not in self.log_files:
|
369
|
+
raise ValueError(
|
370
|
+
"No log file found for '%s', valid values are: %s"
|
371
|
+
% (stream, ", ".join(self.log_files.keys()))
|
372
|
+
)
|
373
|
+
|
374
|
+
log_file = self.log_files[stream]
|
375
|
+
|
376
|
+
with open(log_file, mode="r", encoding="utf-8") as f:
|
377
|
+
if position is not None:
|
378
|
+
f.seek(position)
|
379
|
+
|
380
|
+
while True:
|
381
|
+
# wait for a small time for complete lines to be written to the file
|
382
|
+
# else, there's a possibility that a line may not be completely
|
383
|
+
# written when attempting to read it.
|
384
|
+
# This is not a problem, but improves readability.
|
385
|
+
await asyncio.sleep(log_write_delay)
|
386
|
+
|
387
|
+
try:
|
388
|
+
if timeout_per_line is None:
|
389
|
+
line = f.readline()
|
390
|
+
else:
|
391
|
+
line = await asyncio.wait_for(f.readline(), timeout_per_line)
|
392
|
+
except asyncio.TimeoutError as e:
|
393
|
+
raise LogReadTimeoutError(
|
394
|
+
"Timeout while reading a line from the log file for the "
|
395
|
+
"stream: %s" % stream
|
396
|
+
) from e
|
397
|
+
|
398
|
+
# when we encounter an empty line
|
399
|
+
if not line:
|
400
|
+
# either the process has terminated, in which case we want to break
|
401
|
+
# and stop the reading process of the log file since no more logs
|
402
|
+
# will be written to it
|
403
|
+
if self.process.returncode is not None:
|
404
|
+
break
|
405
|
+
# or the process is still running and more logs could be written to
|
406
|
+
# the file, in which case we continue reading the log file
|
407
|
+
else:
|
408
|
+
continue
|
409
|
+
|
410
|
+
position = f.tell()
|
411
|
+
yield position, line.rstrip()
|
412
|
+
|
413
|
+
async def emit_logs(
|
414
|
+
self, stream: str = "stdout", custom_logger: Callable[..., None] = print
|
415
|
+
):
|
416
|
+
"""
|
417
|
+
Helper function that can easily emit all the logs for a given stream.
|
418
|
+
|
419
|
+
This function will only terminate when all the log has been printed.
|
420
|
+
|
421
|
+
Parameters
|
422
|
+
----------
|
423
|
+
stream : str, default "stdout"
|
424
|
+
The stream to emit logs for. Can be one of "stdout" or "stderr".
|
425
|
+
custom_logger : Callable[..., None], default print
|
426
|
+
A custom logger function that takes in a string and "emits" it. By default,
|
427
|
+
the log is printed to stdout.
|
428
|
+
"""
|
429
|
+
|
430
|
+
async for _, line in self.stream_log(stream):
|
431
|
+
custom_logger(line)
|
432
|
+
|
433
|
+
def cleanup(self):
|
434
|
+
"""Clean up log files for a running subprocesses."""
|
435
|
+
|
436
|
+
if self.run_called:
|
437
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
438
|
+
|
439
|
+
async def kill(self, termination_timeout: float = 1):
|
440
|
+
"""
|
441
|
+
Kill the subprocess and its descendants.
|
442
|
+
|
443
|
+
Parameters
|
444
|
+
----------
|
445
|
+
termination_timeout : float, default 1
|
446
|
+
The time to wait after sending a SIGTERM to the process and its descendants
|
447
|
+
before sending a SIGKILL.
|
448
|
+
"""
|
449
|
+
|
450
|
+
if self.process is not None:
|
451
|
+
kill_process_and_descendants(self.process.pid, termination_timeout)
|
452
|
+
else:
|
453
|
+
print("No process to kill.")
|
454
|
+
|
455
|
+
def _handle_sigint(self, signum, frame):
|
456
|
+
asyncio.create_task(self.kill())
|
457
|
+
|
458
|
+
|
459
|
+
async def main():
|
460
|
+
flow_file = "../try.py"
|
461
|
+
from metaflow.cli import start
|
462
|
+
from metaflow.runner.click_api import MetaflowAPI
|
463
|
+
|
464
|
+
api = MetaflowAPI.from_cli(flow_file, start)
|
465
|
+
command = api().run(alpha=5)
|
466
|
+
cmd = [sys.executable, *command]
|
467
|
+
|
468
|
+
async with SubprocessManager() as spm:
|
469
|
+
# returns immediately
|
470
|
+
pid = await spm.async_run_command(cmd)
|
471
|
+
command_obj = spm.get(pid)
|
472
|
+
|
473
|
+
print(pid)
|
474
|
+
|
475
|
+
# this is None since the process has not completed yet
|
476
|
+
print(command_obj.process.returncode)
|
477
|
+
|
478
|
+
# wait / do some other processing while the process runs in background.
|
479
|
+
# if the process finishes before this sleep period, the calls to `wait`
|
480
|
+
# below are instantaneous since it has already ended..
|
481
|
+
# time.sleep(10)
|
482
|
+
|
483
|
+
# wait for process to finish
|
484
|
+
await command_obj.wait()
|
485
|
+
|
486
|
+
# wait for process to finish with a timeout, kill if timeout expires before completion
|
487
|
+
await command_obj.wait(timeout=2)
|
488
|
+
|
489
|
+
# wait for process to finish while streaming logs
|
490
|
+
await command_obj.wait(stream="stdout")
|
491
|
+
|
492
|
+
# wait for process to finish with a timeout while streaming logs
|
493
|
+
await command_obj.wait(stream="stdout", timeout=3)
|
494
|
+
|
495
|
+
# stream logs line by line and check for existence of a string, noting down the position
|
496
|
+
interesting_position = 0
|
497
|
+
async for position, line in command_obj.stream_log(stream="stdout"):
|
498
|
+
print(line)
|
499
|
+
if "alpha is" in line:
|
500
|
+
interesting_position = position
|
501
|
+
break
|
502
|
+
|
503
|
+
print("ended streaming at: %s" % interesting_position)
|
504
|
+
|
505
|
+
# wait / do some other processing while the process runs in background
|
506
|
+
# if the process finishes before this sleep period, the streaming of logs
|
507
|
+
# below are instantaneous since it has already ended..
|
508
|
+
# time.sleep(10)
|
509
|
+
|
510
|
+
# this blocks till the process completes unless we uncomment the `time.sleep` above..
|
511
|
+
print(
|
512
|
+
"resuming streaming from: %s while process is still running..."
|
513
|
+
% interesting_position
|
514
|
+
)
|
515
|
+
async for position, line in command_obj.stream_log(
|
516
|
+
stream="stdout", position=interesting_position
|
517
|
+
):
|
518
|
+
print(line)
|
519
|
+
|
520
|
+
# this will be instantaneous since the process has finished and we just read from the log file
|
521
|
+
print("process has ended by now... streaming again from scratch..")
|
522
|
+
async for position, line in command_obj.stream_log(stream="stdout"):
|
523
|
+
print(line)
|
524
|
+
|
525
|
+
# this will be instantaneous since the process has finished and we just read from the log file
|
526
|
+
print(
|
527
|
+
"process has ended by now... streaming again but from position of choice.."
|
528
|
+
)
|
529
|
+
async for position, line in command_obj.stream_log(
|
530
|
+
stream="stdout", position=interesting_position
|
531
|
+
):
|
532
|
+
print(line)
|
533
|
+
|
534
|
+
# two parallel streams for stdout
|
535
|
+
tasks = [
|
536
|
+
command_obj.emit_logs(
|
537
|
+
stream="stdout", custom_logger=lambda x: print("[STREAM A]: %s" % x)
|
538
|
+
),
|
539
|
+
# this can be another 'command_obj' too, in which case
|
540
|
+
# we stream logs from 2 different subprocesses in parallel :)
|
541
|
+
command_obj.emit_logs(
|
542
|
+
stream="stdout", custom_logger=lambda x: print("[STREAM B]: %s" % x)
|
543
|
+
),
|
544
|
+
]
|
545
|
+
await asyncio.gather(*tasks)
|
546
|
+
|
547
|
+
# get the location of log files..
|
548
|
+
print(command_obj.log_files)
|
549
|
+
|
550
|
+
|
551
|
+
if __name__ == "__main__":
|
552
|
+
asyncio.run(main())
|
metaflow/task.py
CHANGED
@@ -23,18 +23,7 @@ from .util import all_equal, get_username, resolve_identity, unicode_type
|
|
23
23
|
from .clone_util import clone_task_helper
|
24
24
|
from .metaflow_current import current
|
25
25
|
from metaflow.tracing import get_trace_id
|
26
|
-
from metaflow.
|
27
|
-
|
28
|
-
foreach_frame_field_list = [
|
29
|
-
("step", str),
|
30
|
-
("var", str),
|
31
|
-
("num_splits", int),
|
32
|
-
("index", int),
|
33
|
-
("value", str),
|
34
|
-
]
|
35
|
-
ForeachFrame = namedtuple_with_defaults(
|
36
|
-
"ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list))
|
37
|
-
)
|
26
|
+
from metaflow.tuple_util import ForeachFrame
|
38
27
|
|
39
28
|
# Maximum number of characters of the foreach path that we store in the metadata.
|
40
29
|
MAX_FOREACH_PATH_LENGTH = 256
|
metaflow/tuple_util.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# Keep this file minimum dependency as this will be imported by metaflow at bootup.
|
2
|
+
def namedtuple_with_defaults(typename, field_descr, defaults=()):
|
3
|
+
from typing import NamedTuple
|
4
|
+
|
5
|
+
T = NamedTuple(typename, field_descr)
|
6
|
+
T.__new__.__defaults__ = tuple(defaults)
|
7
|
+
|
8
|
+
# Adding the following to ensure the named tuple can be (un)pickled correctly.
|
9
|
+
import __main__
|
10
|
+
|
11
|
+
setattr(__main__, T.__name__, T)
|
12
|
+
T.__module__ = "__main__"
|
13
|
+
return T
|
14
|
+
|
15
|
+
|
16
|
+
# Define the namedtuple with default here if they need to be accessible in client
|
17
|
+
# (and w/o a real flow).
|
18
|
+
foreach_frame_field_list = [
|
19
|
+
("step", str),
|
20
|
+
("var", str),
|
21
|
+
("num_splits", int),
|
22
|
+
("index", int),
|
23
|
+
("value", str),
|
24
|
+
]
|
25
|
+
ForeachFrame = namedtuple_with_defaults(
|
26
|
+
"ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list))
|
27
|
+
)
|
metaflow/util.py
CHANGED
@@ -51,21 +51,6 @@ except NameError:
|
|
51
51
|
from shlex import quote as _quote
|
52
52
|
|
53
53
|
|
54
|
-
from typing import NamedTuple
|
55
|
-
|
56
|
-
|
57
|
-
def namedtuple_with_defaults(typename, field_descr, defaults=()):
|
58
|
-
T = NamedTuple(typename, field_descr)
|
59
|
-
T.__new__.__defaults__ = tuple(defaults)
|
60
|
-
|
61
|
-
# Adding the following to ensure the named tuple can be (un)pickled correctly.
|
62
|
-
import __main__
|
63
|
-
|
64
|
-
setattr(__main__, T.__name__, T)
|
65
|
-
T.__module__ = "__main__"
|
66
|
-
return T
|
67
|
-
|
68
|
-
|
69
54
|
class TempDir(object):
|
70
55
|
# Provide a temporary directory since Python 2.7 does not have it inbuilt
|
71
56
|
def __enter__(self):
|
metaflow/vendor.py
CHANGED
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.
|
1
|
+
metaflow_version = "2.12.0"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.12.0
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs ==2.
|
29
|
+
Requires-Dist: metaflow-stubs ==2.12.0 ; extra == 'stubs'
|
30
30
|
|
31
31
|

|
32
32
|
|