metaflow 2.11.15__py2.py3-none-any.whl → 2.12.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +8 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +906 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +237 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +307 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/typeguard/_suppression.py +88 -0
- metaflow/_vendor/typeguard/_transformer.py +1193 -0
- metaflow/_vendor/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/typeguard/_utils.py +169 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3053 -0
- metaflow/cli.py +48 -36
- metaflow/clone_util.py +6 -0
- metaflow/cmd/develop/stubs.py +2 -0
- metaflow/extension_support/__init__.py +2 -0
- metaflow/extension_support/plugins.py +2 -0
- metaflow/metaflow_config.py +24 -0
- metaflow/metaflow_environment.py +2 -2
- metaflow/parameters.py +1 -0
- metaflow/plugins/__init__.py +19 -0
- metaflow/plugins/airflow/airflow.py +7 -0
- metaflow/plugins/argo/argo_workflows.py +17 -0
- metaflow/plugins/aws/batch/batch_decorator.py +3 -3
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/azure/azure_credential.py +53 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_utils.py +2 -35
- metaflow/plugins/azure/blob_service_client_factory.py +4 -2
- metaflow/plugins/datastores/azure_storage.py +6 -6
- metaflow/plugins/datatools/s3/s3.py +1 -1
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
- metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
- metaflow/plugins/kubernetes/kubernetes.py +85 -8
- metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
- metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +208 -206
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
- metaflow/plugins/timeout_decorator.py +2 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +406 -0
- metaflow/runner/metaflow_runner.py +452 -0
- metaflow/runner/nbrun.py +246 -0
- metaflow/runner/subprocess_manager.py +552 -0
- metaflow/task.py +1 -12
- metaflow/tuple_util.py +27 -0
- metaflow/util.py +0 -15
- metaflow/vendor.py +0 -1
- metaflow/version.py +1 -1
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/METADATA +2 -2
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/RECORD +72 -39
- metaflow/_vendor/v3_7/__init__.py +0 -1
- /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/LICENSE +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/WHEEL +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,452 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import tempfile
|
4
|
+
import time
|
5
|
+
from typing import Dict, Iterator, Optional, Tuple
|
6
|
+
|
7
|
+
from metaflow import Run, metadata
|
8
|
+
|
9
|
+
from .subprocess_manager import CommandManager, SubprocessManager
|
10
|
+
|
11
|
+
|
12
|
+
def clear_and_set_os_environ(env: Dict):
|
13
|
+
os.environ.clear()
|
14
|
+
os.environ.update(env)
|
15
|
+
|
16
|
+
|
17
|
+
def read_from_file_when_ready(file_path: str, timeout: float = 5):
|
18
|
+
start_time = time.time()
|
19
|
+
with open(file_path, "r", encoding="utf-8") as file_pointer:
|
20
|
+
content = file_pointer.read()
|
21
|
+
while not content:
|
22
|
+
if time.time() - start_time > timeout:
|
23
|
+
raise TimeoutError(
|
24
|
+
"Timeout while waiting for file content from '%s'" % file_path
|
25
|
+
)
|
26
|
+
time.sleep(0.1)
|
27
|
+
content = file_pointer.read()
|
28
|
+
return content
|
29
|
+
|
30
|
+
|
31
|
+
class ExecutingRun(object):
|
32
|
+
"""
|
33
|
+
This class contains a reference to a `metaflow.Run` object representing
|
34
|
+
the currently executing or finished run, as well as metadata related
|
35
|
+
to the process.
|
36
|
+
|
37
|
+
`ExecutingRun` is returned by methods in `Runner` and `NBRunner`. It is not
|
38
|
+
meant to be instantiated directly.
|
39
|
+
|
40
|
+
This class works as a context manager, allowing you to use a pattern like
|
41
|
+
```python
|
42
|
+
with Runner(...).run() as running:
|
43
|
+
...
|
44
|
+
```
|
45
|
+
Note that you should use either this object as the context manager or
|
46
|
+
`Runner`, not both in a nested manner.
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self, runner: "Runner", command_obj: CommandManager, run_obj: Run
|
51
|
+
) -> None:
|
52
|
+
"""
|
53
|
+
Create a new ExecutingRun -- this should not be done by the user directly but
|
54
|
+
instead user Runner.run()
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
runner : Runner
|
59
|
+
Parent runner for this run.
|
60
|
+
command_obj : CommandManager
|
61
|
+
CommandManager containing the subprocess executing this run.
|
62
|
+
run_obj : Run
|
63
|
+
Run object corresponding to this run.
|
64
|
+
"""
|
65
|
+
self.runner = runner
|
66
|
+
self.command_obj = command_obj
|
67
|
+
self.run = run_obj
|
68
|
+
|
69
|
+
def __enter__(self) -> "ExecutingRun":
|
70
|
+
return self
|
71
|
+
|
72
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
73
|
+
self.runner.__exit__(exc_type, exc_value, traceback)
|
74
|
+
|
75
|
+
async def wait(
|
76
|
+
self, timeout: Optional[float] = None, stream: Optional[str] = None
|
77
|
+
) -> "ExecutingRun":
|
78
|
+
"""
|
79
|
+
Wait for this run to finish, optionally with a timeout
|
80
|
+
and optionally streaming its output.
|
81
|
+
|
82
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
83
|
+
|
84
|
+
Parameters
|
85
|
+
----------
|
86
|
+
timeout : Optional[float], default None
|
87
|
+
The maximum time to wait for the run to finish.
|
88
|
+
If the timeout is reached, the run is terminated
|
89
|
+
stream : Optional[str], default None
|
90
|
+
If specified, the specified stream is printed to stdout. `stream` can
|
91
|
+
be one of `stdout` or `stderr`.
|
92
|
+
|
93
|
+
Returns
|
94
|
+
-------
|
95
|
+
ExecutingRun
|
96
|
+
This object, allowing you to chain calls.
|
97
|
+
"""
|
98
|
+
await self.command_obj.wait(timeout, stream)
|
99
|
+
return self
|
100
|
+
|
101
|
+
@property
|
102
|
+
def returncode(self) -> Optional[int]:
|
103
|
+
"""
|
104
|
+
Gets the return code of the underlying subprocess. A non-zero
|
105
|
+
code indicates a failure, `None` a currently executing run.
|
106
|
+
|
107
|
+
Returns
|
108
|
+
-------
|
109
|
+
Optional[int]
|
110
|
+
The return code of the underlying subprocess.
|
111
|
+
"""
|
112
|
+
return self.command_obj.process.returncode
|
113
|
+
|
114
|
+
@property
|
115
|
+
def status(self) -> str:
|
116
|
+
"""
|
117
|
+
Returns the status of the underlying subprocess that is responsible
|
118
|
+
for executing the run.
|
119
|
+
|
120
|
+
The return value is one of the following strings:
|
121
|
+
- `running` indicates a currently executing run.
|
122
|
+
- `failed` indicates a failed run.
|
123
|
+
- `successful` a successful run.
|
124
|
+
|
125
|
+
Returns
|
126
|
+
-------
|
127
|
+
str
|
128
|
+
The current status of the run.
|
129
|
+
"""
|
130
|
+
if self.command_obj.process.returncode is None:
|
131
|
+
return "running"
|
132
|
+
elif self.command_obj.process.returncode != 0:
|
133
|
+
return "failed"
|
134
|
+
else:
|
135
|
+
return "successful"
|
136
|
+
|
137
|
+
@property
|
138
|
+
def stdout(self) -> str:
|
139
|
+
"""
|
140
|
+
Returns the current stdout of the run. If the run is finished, this will
|
141
|
+
contain the entire stdout output. Otherwise, it will contain the
|
142
|
+
stdout up until this point.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
str
|
147
|
+
The current snapshot of stdout.
|
148
|
+
"""
|
149
|
+
with open(
|
150
|
+
self.command_obj.log_files.get("stdout"), "r", encoding="utf-8"
|
151
|
+
) as fp:
|
152
|
+
return fp.read()
|
153
|
+
|
154
|
+
@property
|
155
|
+
def stderr(self) -> str:
|
156
|
+
"""
|
157
|
+
Returns the current stderr of the run. If the run is finished, this will
|
158
|
+
contain the entire stderr output. Otherwise, it will contain the
|
159
|
+
stderr up until this point.
|
160
|
+
|
161
|
+
Returns
|
162
|
+
-------
|
163
|
+
str
|
164
|
+
The current snapshot of stderr.
|
165
|
+
"""
|
166
|
+
with open(
|
167
|
+
self.command_obj.log_files.get("stderr"), "r", encoding="utf-8"
|
168
|
+
) as fp:
|
169
|
+
return fp.read()
|
170
|
+
|
171
|
+
async def stream_log(
|
172
|
+
self, stream: str, position: Optional[int] = None
|
173
|
+
) -> Iterator[Tuple[int, str]]:
|
174
|
+
"""
|
175
|
+
Asynchronous iterator to stream logs from the subprocess line by line.
|
176
|
+
|
177
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
stream : str
|
182
|
+
The stream to stream logs from. Can be one of `stdout` or `stderr`.
|
183
|
+
position : Optional[int], default None
|
184
|
+
The position in the log file to start streaming from. If None, it starts
|
185
|
+
from the beginning of the log file. This allows resuming streaming from
|
186
|
+
a previously known position
|
187
|
+
|
188
|
+
Yields
|
189
|
+
------
|
190
|
+
Tuple[int, str]
|
191
|
+
A tuple containing the position in the log file and the line read. The
|
192
|
+
position returned can be used to feed into another `stream_logs` call
|
193
|
+
for example.
|
194
|
+
"""
|
195
|
+
async for position, line in self.command_obj.stream_log(stream, position):
|
196
|
+
yield position, line
|
197
|
+
|
198
|
+
|
199
|
+
class Runner(object):
|
200
|
+
"""
|
201
|
+
Metaflow's Runner API that presents a programmatic interface
|
202
|
+
to run flows and perform other operations either synchronously or asynchronously.
|
203
|
+
The class expects a path to the flow file along with optional arguments
|
204
|
+
that match top-level options on the command-line.
|
205
|
+
|
206
|
+
This class works as a context manager, calling `cleanup()` to remove
|
207
|
+
temporary files at exit.
|
208
|
+
|
209
|
+
Example:
|
210
|
+
```python
|
211
|
+
with Runner('slowflow.py', pylint=False) as runner:
|
212
|
+
result = runner.run(alpha=5, tags=["abc", "def"], max_workers=5)
|
213
|
+
print(result.run.finished)
|
214
|
+
```
|
215
|
+
|
216
|
+
Parameters
|
217
|
+
----------
|
218
|
+
flow_file : str
|
219
|
+
Path to the flow file to run
|
220
|
+
show_output : bool, default True
|
221
|
+
Show the 'stdout' and 'stderr' to the console by default,
|
222
|
+
Only applicable for synchronous 'run' and 'resume' functions.
|
223
|
+
profile : Optional[str], default None
|
224
|
+
Metaflow profile to use to run this run. If not specified, the default
|
225
|
+
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
226
|
+
env : Optional[Dict], default None
|
227
|
+
Additional environment variables to set for the Run. This overrides the
|
228
|
+
environment set for this process.
|
229
|
+
cwd : Optional[str], default None
|
230
|
+
The directory to run the subprocess in; if not specified, the current
|
231
|
+
directory is used.
|
232
|
+
**kwargs : Any
|
233
|
+
Additional arguments that you would pass to `python myflow.py` before
|
234
|
+
the `run` command.
|
235
|
+
"""
|
236
|
+
|
237
|
+
def __init__(
|
238
|
+
self,
|
239
|
+
flow_file: str,
|
240
|
+
show_output: bool = True,
|
241
|
+
profile: Optional[str] = None,
|
242
|
+
env: Optional[Dict] = None,
|
243
|
+
cwd: Optional[str] = None,
|
244
|
+
**kwargs
|
245
|
+
):
|
246
|
+
# these imports are required here and not at the top
|
247
|
+
# since they interfere with the user defined Parameters
|
248
|
+
# in the flow file, this is related to the ability of
|
249
|
+
# importing 'Runner' directly i.e.
|
250
|
+
# from metaflow import Runner
|
251
|
+
# This ability is made possible by the statement:
|
252
|
+
# 'from .metaflow_runner import Runner' in '__init__.py'
|
253
|
+
from metaflow.cli import start
|
254
|
+
from metaflow.runner.click_api import MetaflowAPI
|
255
|
+
|
256
|
+
self.flow_file = flow_file
|
257
|
+
self.show_output = show_output
|
258
|
+
|
259
|
+
self.old_env = os.environ.copy()
|
260
|
+
self.env_vars = self.old_env.copy()
|
261
|
+
self.env_vars.update(env or {})
|
262
|
+
if profile:
|
263
|
+
self.env_vars["METAFLOW_PROFILE"] = profile
|
264
|
+
|
265
|
+
self.cwd = cwd
|
266
|
+
self.spm = SubprocessManager()
|
267
|
+
self.top_level_kwargs = kwargs
|
268
|
+
self.api = MetaflowAPI.from_cli(self.flow_file, start)
|
269
|
+
|
270
|
+
def __enter__(self) -> "Runner":
|
271
|
+
return self
|
272
|
+
|
273
|
+
async def __aenter__(self) -> "Runner":
|
274
|
+
return self
|
275
|
+
|
276
|
+
def __get_executing_run(self, tfp_runner_attribute, command_obj):
|
277
|
+
# When two 'Runner' executions are done sequentially i.e. one after the other
|
278
|
+
# the 2nd run kinda uses the 1st run's previously set metadata and
|
279
|
+
# environment variables.
|
280
|
+
|
281
|
+
# It is thus necessary to set them to correct values before we return
|
282
|
+
# the Run object.
|
283
|
+
try:
|
284
|
+
# Set the environment variables to what they were before the run executed.
|
285
|
+
clear_and_set_os_environ(self.old_env)
|
286
|
+
|
287
|
+
# Set the correct metadata from the runner_attribute file corresponding to this run.
|
288
|
+
content = read_from_file_when_ready(tfp_runner_attribute.name, timeout=10)
|
289
|
+
metadata_for_flow, pathspec = content.rsplit(":", maxsplit=1)
|
290
|
+
metadata(metadata_for_flow)
|
291
|
+
run_object = Run(pathspec, _namespace_check=False)
|
292
|
+
return ExecutingRun(self, command_obj, run_object)
|
293
|
+
except TimeoutError as e:
|
294
|
+
stdout_log = open(command_obj.log_files["stdout"]).read()
|
295
|
+
stderr_log = open(command_obj.log_files["stderr"]).read()
|
296
|
+
command = " ".join(command_obj.command)
|
297
|
+
error_message = "Error executing: '%s':\n" % command
|
298
|
+
if stdout_log.strip():
|
299
|
+
error_message += "\nStdout:\n%s\n" % stdout_log
|
300
|
+
if stderr_log.strip():
|
301
|
+
error_message += "\nStderr:\n%s\n" % stderr_log
|
302
|
+
raise RuntimeError(error_message) from e
|
303
|
+
|
304
|
+
def run(self, **kwargs) -> ExecutingRun:
|
305
|
+
"""
|
306
|
+
Blocking execution of the run. This method will wait until
|
307
|
+
the run has completed execution.
|
308
|
+
|
309
|
+
Parameters
|
310
|
+
----------
|
311
|
+
**kwargs : Any
|
312
|
+
Additional arguments that you would pass to `python myflow.py` after
|
313
|
+
the `run` command, in particular, any parameters accepted by the flow.
|
314
|
+
|
315
|
+
Returns
|
316
|
+
-------
|
317
|
+
ExecutingRun
|
318
|
+
ExecutingRun containing the results of the run.
|
319
|
+
"""
|
320
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
321
|
+
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
322
|
+
dir=temp_dir, delete=False
|
323
|
+
)
|
324
|
+
command = self.api(**self.top_level_kwargs).run(
|
325
|
+
runner_attribute_file=tfp_runner_attribute.name, **kwargs
|
326
|
+
)
|
327
|
+
|
328
|
+
pid = self.spm.run_command(
|
329
|
+
[sys.executable, *command],
|
330
|
+
env=self.env_vars,
|
331
|
+
cwd=self.cwd,
|
332
|
+
show_output=self.show_output,
|
333
|
+
)
|
334
|
+
command_obj = self.spm.get(pid)
|
335
|
+
|
336
|
+
return self.__get_executing_run(tfp_runner_attribute, command_obj)
|
337
|
+
|
338
|
+
def resume(self, **kwargs):
|
339
|
+
"""
|
340
|
+
Blocking resume execution of the run.
|
341
|
+
This method will wait until the resumed run has completed execution.
|
342
|
+
|
343
|
+
Parameters
|
344
|
+
----------
|
345
|
+
**kwargs : Any
|
346
|
+
Additional arguments that you would pass to `python ./myflow.py` after
|
347
|
+
the `resume` command.
|
348
|
+
|
349
|
+
Returns
|
350
|
+
-------
|
351
|
+
ExecutingRun
|
352
|
+
ExecutingRun containing the results of the resumed run.
|
353
|
+
"""
|
354
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
355
|
+
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
356
|
+
dir=temp_dir, delete=False
|
357
|
+
)
|
358
|
+
command = self.api(**self.top_level_kwargs).resume(
|
359
|
+
runner_attribute_file=tfp_runner_attribute.name, **kwargs
|
360
|
+
)
|
361
|
+
|
362
|
+
pid = self.spm.run_command(
|
363
|
+
[sys.executable, *command],
|
364
|
+
env=self.env_vars,
|
365
|
+
cwd=self.cwd,
|
366
|
+
show_output=self.show_output,
|
367
|
+
)
|
368
|
+
command_obj = self.spm.get(pid)
|
369
|
+
|
370
|
+
return self.__get_executing_run(tfp_runner_attribute, command_obj)
|
371
|
+
|
372
|
+
async def async_run(self, **kwargs) -> ExecutingRun:
|
373
|
+
"""
|
374
|
+
Non-blocking execution of the run. This method will return as soon as the
|
375
|
+
run has launched.
|
376
|
+
|
377
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
378
|
+
|
379
|
+
Parameters
|
380
|
+
----------
|
381
|
+
**kwargs : Any
|
382
|
+
Additional arguments that you would pass to `python myflow.py` after
|
383
|
+
the `run` command, in particular, any parameters accepted by the flow.
|
384
|
+
|
385
|
+
Returns
|
386
|
+
-------
|
387
|
+
ExecutingRun
|
388
|
+
ExecutingRun representing the run that was started.
|
389
|
+
"""
|
390
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
391
|
+
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
392
|
+
dir=temp_dir, delete=False
|
393
|
+
)
|
394
|
+
command = self.api(**self.top_level_kwargs).run(
|
395
|
+
runner_attribute_file=tfp_runner_attribute.name, **kwargs
|
396
|
+
)
|
397
|
+
|
398
|
+
pid = await self.spm.async_run_command(
|
399
|
+
[sys.executable, *command],
|
400
|
+
env=self.env_vars,
|
401
|
+
cwd=self.cwd,
|
402
|
+
)
|
403
|
+
command_obj = self.spm.get(pid)
|
404
|
+
|
405
|
+
return self.__get_executing_run(tfp_runner_attribute, command_obj)
|
406
|
+
|
407
|
+
async def async_resume(self, **kwargs):
|
408
|
+
"""
|
409
|
+
Non-blocking resume execution of the run.
|
410
|
+
This method will return as soon as the resume has launched.
|
411
|
+
|
412
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
413
|
+
|
414
|
+
Parameters
|
415
|
+
----------
|
416
|
+
**kwargs : Any
|
417
|
+
Additional arguments that you would pass to `python myflow.py` after
|
418
|
+
the `resume` command.
|
419
|
+
|
420
|
+
Returns
|
421
|
+
-------
|
422
|
+
ExecutingRun
|
423
|
+
ExecutingRun representing the resumed run that was started.
|
424
|
+
"""
|
425
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
426
|
+
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
427
|
+
dir=temp_dir, delete=False
|
428
|
+
)
|
429
|
+
command = self.api(**self.top_level_kwargs).resume(
|
430
|
+
runner_attribute_file=tfp_runner_attribute.name, **kwargs
|
431
|
+
)
|
432
|
+
|
433
|
+
pid = await self.spm.async_run_command(
|
434
|
+
[sys.executable, *command],
|
435
|
+
env=self.env_vars,
|
436
|
+
cwd=self.cwd,
|
437
|
+
)
|
438
|
+
command_obj = self.spm.get(pid)
|
439
|
+
|
440
|
+
return self.__get_executing_run(tfp_runner_attribute, command_obj)
|
441
|
+
|
442
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
443
|
+
self.spm.cleanup()
|
444
|
+
|
445
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
446
|
+
self.spm.cleanup()
|
447
|
+
|
448
|
+
def cleanup(self):
|
449
|
+
"""
|
450
|
+
Delete any temporary files created during execution.
|
451
|
+
"""
|
452
|
+
self.spm.cleanup()
|
metaflow/runner/nbrun.py
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
import ast
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
from typing import Dict, Optional
|
5
|
+
|
6
|
+
from metaflow import Runner
|
7
|
+
|
8
|
+
DEFAULT_DIR = tempfile.gettempdir()
|
9
|
+
|
10
|
+
|
11
|
+
class NBRunnerInitializationError(Exception):
|
12
|
+
"""Custom exception for errors during NBRunner initialization."""
|
13
|
+
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
def get_current_cell(ipython):
|
18
|
+
if ipython:
|
19
|
+
return ipython.history_manager.input_hist_raw[-1]
|
20
|
+
return None
|
21
|
+
|
22
|
+
|
23
|
+
def format_flowfile(cell):
|
24
|
+
"""
|
25
|
+
Formats the given cell content to create a valid Python script that can be executed as a Metaflow flow.
|
26
|
+
"""
|
27
|
+
flowspec = [
|
28
|
+
x
|
29
|
+
for x in ast.parse(cell).body
|
30
|
+
if isinstance(x, ast.ClassDef) and any(b.id == "FlowSpec" for b in x.bases)
|
31
|
+
]
|
32
|
+
|
33
|
+
if not flowspec:
|
34
|
+
raise ModuleNotFoundError(
|
35
|
+
"The cell doesn't contain any class that inherits from 'FlowSpec'"
|
36
|
+
)
|
37
|
+
|
38
|
+
lines = cell.splitlines()[: flowspec[0].end_lineno]
|
39
|
+
lines += ["if __name__ == '__main__':", f" {flowspec[0].name}()"]
|
40
|
+
return "\n".join(lines)
|
41
|
+
|
42
|
+
|
43
|
+
class NBRunner(object):
|
44
|
+
"""
|
45
|
+
A wrapper over `Runner` for executing flows defined in a Jupyter
|
46
|
+
notebook cell.
|
47
|
+
|
48
|
+
Instantiate this class on the last line of a notebook cell where
|
49
|
+
a `flow` is defined. In contrast to `Runner`, this class is not
|
50
|
+
meant to be used a context manager. Instead, use a blocking helper
|
51
|
+
function like `nbrun` (which calls `cleanup()` internally) or call
|
52
|
+
`cleanup()` explictly when using non-blocking APIs.
|
53
|
+
|
54
|
+
```python
|
55
|
+
run = NBRunner(FlowName).nbrun()
|
56
|
+
```
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
flow : FlowSpec
|
61
|
+
Flow defined in the same cell
|
62
|
+
show_output : bool, default True
|
63
|
+
Show the 'stdout' and 'stderr' to the console by default,
|
64
|
+
Only applicable for synchronous 'run' and 'resume' functions.
|
65
|
+
profile : Optional[str], default None
|
66
|
+
Metaflow profile to use to run this run. If not specified, the default
|
67
|
+
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
68
|
+
env : Optional[Dict], default None
|
69
|
+
Additional environment variables to set for the Run. This overrides the
|
70
|
+
environment set for this process.
|
71
|
+
base_dir : Optional[str], default None
|
72
|
+
The directory to run the subprocess in; if not specified, a temporary
|
73
|
+
directory is used.
|
74
|
+
**kwargs : Any
|
75
|
+
Additional arguments that you would pass to `python myflow.py` before
|
76
|
+
the `run` command.
|
77
|
+
|
78
|
+
"""
|
79
|
+
|
80
|
+
def __init__(
|
81
|
+
self,
|
82
|
+
flow,
|
83
|
+
show_output: bool = True,
|
84
|
+
profile: Optional[str] = None,
|
85
|
+
env: Optional[Dict] = None,
|
86
|
+
base_dir: str = DEFAULT_DIR,
|
87
|
+
**kwargs,
|
88
|
+
):
|
89
|
+
try:
|
90
|
+
from IPython import get_ipython
|
91
|
+
|
92
|
+
ipython = get_ipython()
|
93
|
+
except ModuleNotFoundError:
|
94
|
+
raise NBRunnerInitializationError(
|
95
|
+
"'NBRunner' requires an interactive Python environment (such as Jupyter)"
|
96
|
+
)
|
97
|
+
|
98
|
+
self.cell = get_current_cell(ipython)
|
99
|
+
self.flow = flow
|
100
|
+
self.show_output = show_output
|
101
|
+
|
102
|
+
self.env_vars = os.environ.copy()
|
103
|
+
self.env_vars.update(env or {})
|
104
|
+
self.env_vars.update({"JPY_PARENT_PID": ""})
|
105
|
+
if profile:
|
106
|
+
self.env_vars["METAFLOW_PROFILE"] = profile
|
107
|
+
|
108
|
+
self.base_dir = base_dir
|
109
|
+
|
110
|
+
if not self.cell:
|
111
|
+
raise ValueError("Couldn't find a cell.")
|
112
|
+
|
113
|
+
self.tmp_flow_file = tempfile.NamedTemporaryFile(
|
114
|
+
prefix=self.flow.__name__,
|
115
|
+
suffix=".py",
|
116
|
+
mode="w",
|
117
|
+
dir=self.base_dir,
|
118
|
+
delete=False,
|
119
|
+
)
|
120
|
+
|
121
|
+
self.tmp_flow_file.write(format_flowfile(self.cell))
|
122
|
+
self.tmp_flow_file.flush()
|
123
|
+
self.tmp_flow_file.close()
|
124
|
+
|
125
|
+
self.runner = Runner(
|
126
|
+
flow_file=self.tmp_flow_file.name,
|
127
|
+
show_output=self.show_output,
|
128
|
+
profile=profile,
|
129
|
+
env=self.env_vars,
|
130
|
+
cwd=self.base_dir,
|
131
|
+
**kwargs,
|
132
|
+
)
|
133
|
+
|
134
|
+
def nbrun(self, **kwargs):
|
135
|
+
"""
|
136
|
+
Blocking execution of the run. This method will wait until
|
137
|
+
the run has completed execution.
|
138
|
+
|
139
|
+
Note that in contrast to `run`, this method returns a
|
140
|
+
`metaflow.Run` object directly and calls `cleanup()` internally
|
141
|
+
to support a common notebook pattern of executing a flow and
|
142
|
+
retrieving its results immediately.
|
143
|
+
|
144
|
+
Parameters
|
145
|
+
----------
|
146
|
+
**kwargs : Any
|
147
|
+
Additional arguments that you would pass to `python myflow.py` after
|
148
|
+
the `run` command, in particular, any parameters accepted by the flow.
|
149
|
+
|
150
|
+
Returns
|
151
|
+
-------
|
152
|
+
Run
|
153
|
+
A `metaflow.Run` object representing the finished run.
|
154
|
+
"""
|
155
|
+
result = self.runner.run(**kwargs)
|
156
|
+
self.cleanup()
|
157
|
+
return result.run
|
158
|
+
|
159
|
+
def nbresume(self, **kwargs):
|
160
|
+
"""
|
161
|
+
Blocking resuming of a run. This method will wait until
|
162
|
+
the resumed run has completed execution.
|
163
|
+
|
164
|
+
Note that in contrast to `resume`, this method returns a
|
165
|
+
`metaflow.Run` object directly and calls `cleanup()` internally
|
166
|
+
to support a common notebook pattern of executing a flow and
|
167
|
+
retrieving its results immediately.
|
168
|
+
|
169
|
+
Parameters
|
170
|
+
----------
|
171
|
+
**kwargs : Any
|
172
|
+
Additional arguments that you would pass to `python myflow.py` after
|
173
|
+
the `resume` command.
|
174
|
+
|
175
|
+
Returns
|
176
|
+
-------
|
177
|
+
Run
|
178
|
+
A `metaflow.Run` object representing the resumed run.
|
179
|
+
"""
|
180
|
+
|
181
|
+
result = self.runner.resume(**kwargs)
|
182
|
+
self.cleanup()
|
183
|
+
return result.run
|
184
|
+
|
185
|
+
def run(self, **kwargs):
|
186
|
+
"""
|
187
|
+
Runs the flow.
|
188
|
+
"""
|
189
|
+
return self.runner.run(**kwargs)
|
190
|
+
|
191
|
+
def resume(self, **kwargs):
|
192
|
+
"""
|
193
|
+
Resumes the flow.
|
194
|
+
"""
|
195
|
+
return self.runner.resume(**kwargs)
|
196
|
+
|
197
|
+
async def async_run(self, **kwargs):
|
198
|
+
"""
|
199
|
+
Non-blocking execution of the run. This method will return as soon as the
|
200
|
+
run has launched. This method is equivalent to `Runner.async_run`.
|
201
|
+
|
202
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
203
|
+
|
204
|
+
|
205
|
+
Parameters
|
206
|
+
----------
|
207
|
+
**kwargs : Any
|
208
|
+
Additional arguments that you would pass to `python myflow.py` after
|
209
|
+
the `run` command, in particular, any parameters accepted by the flow.
|
210
|
+
|
211
|
+
Returns
|
212
|
+
-------
|
213
|
+
ExecutingRun
|
214
|
+
ExecutingRun representing the run that was started.
|
215
|
+
"""
|
216
|
+
return await self.runner.async_run(**kwargs)
|
217
|
+
|
218
|
+
async def async_resume(self, **kwargs):
|
219
|
+
"""
|
220
|
+
Non-blocking execution of the run. This method will return as soon as the
|
221
|
+
run has launched. This method is equivalent to `Runner.async_resume`.
|
222
|
+
|
223
|
+
Note that this method is asynchronous and needs to be `await`ed.
|
224
|
+
|
225
|
+
Parameters
|
226
|
+
----------
|
227
|
+
**kwargs : Any
|
228
|
+
Additional arguments that you would pass to `python myflow.py` after
|
229
|
+
the `run` command, in particular, any parameters accepted by the flow.
|
230
|
+
|
231
|
+
Returns
|
232
|
+
-------
|
233
|
+
ExecutingRun
|
234
|
+
ExecutingRun representing the run that was started.
|
235
|
+
"""
|
236
|
+
return await self.runner.async_resume(**kwargs)
|
237
|
+
|
238
|
+
def cleanup(self):
|
239
|
+
"""
|
240
|
+
Delete any temporary files created during execution.
|
241
|
+
|
242
|
+
Call this method after using `async_run` or `async_resume`. You don't
|
243
|
+
have to call this after `nbrun` or `nbresume`.
|
244
|
+
"""
|
245
|
+
os.remove(self.tmp_flow_file.name)
|
246
|
+
self.runner.cleanup()
|