metaflow 2.18.13__py2.py3-none-any.whl → 2.19.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/metaflow_config.py +41 -0
  19. metaflow/metaflow_profile.py +18 -0
  20. metaflow/packaging_sys/utils.py +2 -39
  21. metaflow/packaging_sys/v1.py +63 -16
  22. metaflow/plugins/__init__.py +2 -0
  23. metaflow/plugins/argo/argo_client.py +1 -0
  24. metaflow/plugins/argo/argo_workflows.py +3 -1
  25. metaflow/plugins/cards/card_datastore.py +9 -3
  26. metaflow/plugins/cards/card_decorator.py +1 -0
  27. metaflow/plugins/cards/card_modules/basic.py +9 -3
  28. metaflow/plugins/datastores/local_storage.py +12 -6
  29. metaflow/plugins/datastores/spin_storage.py +12 -0
  30. metaflow/plugins/datatools/s3/s3.py +29 -10
  31. metaflow/plugins/datatools/s3/s3op.py +90 -62
  32. metaflow/plugins/metadata_providers/local.py +76 -82
  33. metaflow/plugins/metadata_providers/spin.py +16 -0
  34. metaflow/runner/metaflow_runner.py +210 -19
  35. metaflow/runtime.py +348 -21
  36. metaflow/task.py +61 -12
  37. metaflow/user_configs/config_parameters.py +2 -4
  38. metaflow/user_decorators/mutable_flow.py +1 -1
  39. metaflow/user_decorators/user_step_decorator.py +10 -1
  40. metaflow/util.py +191 -1
  41. metaflow/version.py +1 -1
  42. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Makefile +10 -0
  43. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/METADATA +2 -4
  44. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/RECORD +50 -47
  45. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  46. {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  47. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/WHEEL +0 -0
  48. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/entry_points.txt +0 -0
  49. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/licenses/LICENSE +0 -0
  50. {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ import json
6
6
 
7
7
  from typing import Dict, Iterator, Optional, Tuple
8
8
 
9
- from metaflow import Run
9
+ from metaflow import Run, Task
10
10
 
11
11
  from metaflow.metaflow_config import CLICK_API_PROCESS_CONFIG
12
12
 
@@ -21,30 +21,36 @@ from .utils import (
21
21
  from .subprocess_manager import CommandManager, SubprocessManager
22
22
 
23
23
 
24
- class ExecutingRun(object):
24
+ class ExecutingProcess(object):
25
25
  """
26
- This class contains a reference to a `metaflow.Run` object representing
27
- the currently executing or finished run, as well as metadata related
28
- to the process.
26
+ This is a base class for `ExecutingRun` and `ExecutingTask` classes.
27
+ The `ExecutingRun` and `ExecutingTask` classes are returned by methods
28
+ in `Runner` and `NBRunner`, and they are subclasses of this class.
29
29
 
30
- `ExecutingRun` is returned by methods in `Runner` and `NBRunner`. It is not
31
- meant to be instantiated directly.
30
+ The `ExecutingRun` class for instance contains a reference to a `metaflow.Run`
31
+ object representing the currently executing or finished run, as well as the metadata
32
+ related to the process.
33
+
34
+ Similarly, the `ExecutingTask` class contains a reference to a `metaflow.Task`
35
+ object representing the currently executing or finished task, as well as the metadata
36
+ related to the process.
37
+
38
+ This class or its subclasses are not meant to be instantiated directly. The class
39
+ works as a context manager, allowing you to use a pattern like:
32
40
 
33
- This class works as a context manager, allowing you to use a pattern like
34
41
  ```python
35
42
  with Runner(...).run() as running:
36
43
  ...
37
44
  ```
38
- Note that you should use either this object as the context manager or
39
- `Runner`, not both in a nested manner.
45
+
46
+ Note that you should use either this object as the context manager or `Runner`, not both
47
+ in a nested manner.
40
48
  """
41
49
 
42
- def __init__(
43
- self, runner: "Runner", command_obj: CommandManager, run_obj: Run
44
- ) -> None:
50
+ def __init__(self, runner: "Runner", command_obj: CommandManager) -> None:
45
51
  """
46
52
  Create a new ExecutingRun -- this should not be done by the user directly but
47
- instead user Runner.run()
53
+ instead use Runner.run()
48
54
 
49
55
  Parameters
50
56
  ----------
@@ -57,9 +63,8 @@ class ExecutingRun(object):
57
63
  """
58
64
  self.runner = runner
59
65
  self.command_obj = command_obj
60
- self.run = run_obj
61
66
 
62
- def __enter__(self) -> "ExecutingRun":
67
+ def __enter__(self) -> "ExecutingProcess":
63
68
  return self
64
69
 
65
70
  def __exit__(self, exc_type, exc_value, traceback):
@@ -67,7 +72,7 @@ class ExecutingRun(object):
67
72
 
68
73
  async def wait(
69
74
  self, timeout: Optional[float] = None, stream: Optional[str] = None
70
- ) -> "ExecutingRun":
75
+ ) -> "ExecutingProcess":
71
76
  """
72
77
  Wait for this run to finish, optionally with a timeout
73
78
  and optionally streaming its output.
@@ -86,7 +91,7 @@ class ExecutingRun(object):
86
91
 
87
92
  Returns
88
93
  -------
89
- ExecutingRun
94
+ ExecutingProcess
90
95
  This object, allowing you to chain calls.
91
96
  """
92
97
  await self.command_obj.wait(timeout, stream)
@@ -193,6 +198,76 @@ class ExecutingRun(object):
193
198
  yield position, line
194
199
 
195
200
 
201
+ class ExecutingTask(ExecutingProcess):
202
+ """
203
+ This class contains a reference to a `metaflow.Task` object representing
204
+ the currently executing or finished task, as well as metadata related
205
+ to the process.
206
+ `ExecutingTask` is returned by methods in `Runner` and `NBRunner`. It is not
207
+ meant to be instantiated directly.
208
+ This class works as a context manager, allowing you to use a pattern like
209
+ ```python
210
+ with Runner(...).spin() as running:
211
+ ...
212
+ ```
213
+ Note that you should use either this object as the context manager or
214
+ `Runner`, not both in a nested manner.
215
+ """
216
+
217
+ def __init__(
218
+ self, runner: "Runner", command_obj: CommandManager, task_obj: Task
219
+ ) -> None:
220
+ """
221
+ Create a new ExecutingTask -- this should not be done by the user directly but
222
+ instead use Runner.spin()
223
+ Parameters
224
+ ----------
225
+ runner : Runner
226
+ Parent runner for this task.
227
+ command_obj : CommandManager
228
+ CommandManager containing the subprocess executing this task.
229
+ task_obj : Task
230
+ Task object corresponding to this task.
231
+ """
232
+ super().__init__(runner, command_obj)
233
+ self.task = task_obj
234
+
235
+
236
+ class ExecutingRun(ExecutingProcess):
237
+ """
238
+ This class contains a reference to a `metaflow.Run` object representing
239
+ the currently executing or finished run, as well as metadata related
240
+ to the process.
241
+ `ExecutingRun` is returned by methods in `Runner` and `NBRunner`. It is not
242
+ meant to be instantiated directly.
243
+ This class works as a context manager, allowing you to use a pattern like
244
+ ```python
245
+ with Runner(...).run() as running:
246
+ ...
247
+ ```
248
+ Note that you should use either this object as the context manager or
249
+ `Runner`, not both in a nested manner.
250
+ """
251
+
252
+ def __init__(
253
+ self, runner: "Runner", command_obj: CommandManager, run_obj: Run
254
+ ) -> None:
255
+ """
256
+ Create a new ExecutingRun -- this should not be done by the user directly but
257
+ instead use Runner.run()
258
+ Parameters
259
+ ----------
260
+ runner : Runner
261
+ Parent runner for this run.
262
+ command_obj : CommandManager
263
+ CommandManager containing the subprocess executing this run.
264
+ run_obj : Run
265
+ Run object corresponding to this run.
266
+ """
267
+ super().__init__(runner, command_obj)
268
+ self.run = run_obj
269
+
270
+
196
271
  class RunnerMeta(type):
197
272
  def __new__(mcs, name, bases, dct):
198
273
  cls = super().__new__(mcs, name, bases, dct)
@@ -275,7 +350,7 @@ class Runner(metaclass=RunnerMeta):
275
350
  env: Optional[Dict[str, str]] = None,
276
351
  cwd: Optional[str] = None,
277
352
  file_read_timeout: int = 3600,
278
- **kwargs
353
+ **kwargs,
279
354
  ):
280
355
  # these imports are required here and not at the top
281
356
  # since they interfere with the user defined Parameters
@@ -397,6 +472,78 @@ class Runner(metaclass=RunnerMeta):
397
472
 
398
473
  return self.__get_executing_run(attribute_file_fd, command_obj)
399
474
 
475
+ def __get_executing_task(self, attribute_file_fd, command_obj):
476
+ content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
477
+
478
+ command_obj.sync_wait()
479
+
480
+ content = json.loads(content)
481
+ pathspec = f"{content.get('flow_name')}/{content.get('run_id')}/{content.get('step_name')}/{content.get('task_id')}"
482
+
483
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
484
+ metadata_for_flow = content.get("metadata")
485
+
486
+ task_object = Task(
487
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
488
+ )
489
+ return ExecutingTask(self, command_obj, task_object)
490
+
491
+ async def __async_get_executing_task(self, attribute_file_fd, command_obj):
492
+ content = await async_handle_timeout(
493
+ attribute_file_fd, command_obj, self.file_read_timeout
494
+ )
495
+ content = json.loads(content)
496
+ pathspec = f"{content.get('flow_name')}/{content.get('run_id')}/{content.get('step_name')}/{content.get('task_id')}"
497
+
498
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
499
+ metadata_for_flow = content.get("metadata")
500
+
501
+ task_object = Task(
502
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
503
+ )
504
+ return ExecutingTask(self, command_obj, task_object)
505
+
506
+ def spin(self, pathspec, **kwargs) -> ExecutingTask:
507
+ """
508
+ Blocking spin execution of the run.
509
+ This method will wait until the spun run has completed execution.
510
+ Parameters
511
+ ----------
512
+ pathspec : str
513
+ The pathspec of the step/task to spin.
514
+ **kwargs : Any
515
+ Additional arguments that you would pass to `python ./myflow.py` after
516
+ the `spin` command.
517
+ Returns
518
+ -------
519
+ ExecutingTask
520
+ ExecutingTask containing the results of the spun task.
521
+ """
522
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
523
+ if CLICK_API_PROCESS_CONFIG:
524
+ with with_dir(self.cwd):
525
+ command = self.api(**self.top_level_kwargs).spin(
526
+ pathspec=pathspec,
527
+ runner_attribute_file=attribute_file_path,
528
+ **kwargs,
529
+ )
530
+ else:
531
+ command = self.api(**self.top_level_kwargs).spin(
532
+ pathspec=pathspec,
533
+ runner_attribute_file=attribute_file_path,
534
+ **kwargs,
535
+ )
536
+
537
+ pid = self.spm.run_command(
538
+ [sys.executable, *command],
539
+ env=self.env_vars,
540
+ cwd=self.cwd,
541
+ show_output=self.show_output,
542
+ )
543
+ command_obj = self.spm.get(pid)
544
+
545
+ return self.__get_executing_task(attribute_file_fd, command_obj)
546
+
400
547
  def resume(self, **kwargs) -> ExecutingRun:
401
548
  """
402
549
  Blocking resume execution of the run.
@@ -510,6 +657,50 @@ class Runner(metaclass=RunnerMeta):
510
657
 
511
658
  return await self.__async_get_executing_run(attribute_file_fd, command_obj)
512
659
 
660
+ async def async_spin(self, pathspec, **kwargs) -> ExecutingTask:
661
+ """
662
+ Non-blocking spin execution of the run.
663
+ This method will return as soon as the spun task has launched.
664
+
665
+ Note that this method is asynchronous and needs to be `await`ed.
666
+
667
+ Parameters
668
+ ----------
669
+ pathspec : str
670
+ The pathspec of the step/task to spin.
671
+ **kwargs : Any
672
+ Additional arguments that you would pass to `python ./myflow.py` after
673
+ the `spin` command.
674
+
675
+ Returns
676
+ -------
677
+ ExecutingTask
678
+ ExecutingTask representing the spun task that was started.
679
+ """
680
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
681
+ if CLICK_API_PROCESS_CONFIG:
682
+ with with_dir(self.cwd):
683
+ command = self.api(**self.top_level_kwargs).spin(
684
+ pathspec=pathspec,
685
+ runner_attribute_file=attribute_file_path,
686
+ **kwargs,
687
+ )
688
+ else:
689
+ command = self.api(**self.top_level_kwargs).spin(
690
+ pathspec=pathspec,
691
+ runner_attribute_file=attribute_file_path,
692
+ **kwargs,
693
+ )
694
+
695
+ pid = await self.spm.async_run_command(
696
+ [sys.executable, *command],
697
+ env=self.env_vars,
698
+ cwd=self.cwd,
699
+ )
700
+ command_obj = self.spm.get(pid)
701
+
702
+ return await self.__async_get_executing_task(attribute_file_fd, command_obj)
703
+
513
704
  def __exit__(self, exc_type, exc_value, traceback):
514
705
  self.spm.cleanup()
515
706