ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
1
  import importlib
2
+ import inspect
2
3
  import os
3
4
  import sys
4
5
  import json
5
6
 
6
7
  from typing import Dict, Iterator, Optional, Tuple
7
8
 
8
- from metaflow import Run
9
+ from metaflow import Run, Task
10
+
11
+ from metaflow.metaflow_config import CLICK_API_PROCESS_CONFIG
9
12
 
10
13
  from metaflow.plugins import get_runner_cli
11
14
 
@@ -13,34 +16,41 @@ from .utils import (
13
16
  temporary_fifo,
14
17
  handle_timeout,
15
18
  async_handle_timeout,
19
+ with_dir,
16
20
  )
17
21
  from .subprocess_manager import CommandManager, SubprocessManager
18
22
 
19
23
 
20
- class ExecutingRun(object):
24
+ class ExecutingProcess(object):
21
25
  """
22
- This class contains a reference to a `metaflow.Run` object representing
23
- the currently executing or finished run, as well as metadata related
24
- to the process.
26
+ This is a base class for `ExecutingRun` and `ExecutingTask` classes.
27
+ The `ExecutingRun` and `ExecutingTask` classes are returned by methods
28
+ in `Runner` and `NBRunner`, and they are subclasses of this class.
25
29
 
26
- `ExecutingRun` is returned by methods in `Runner` and `NBRunner`. It is not
27
- meant to be instantiated directly.
30
+ The `ExecutingRun` class for instance contains a reference to a `metaflow.Run`
31
+ object representing the currently executing or finished run, as well as the metadata
32
+ related to the process.
33
+
34
+ Similarly, the `ExecutingTask` class contains a reference to a `metaflow.Task`
35
+ object representing the currently executing or finished task, as well as the metadata
36
+ related to the process.
37
+
38
+ This class or its subclasses are not meant to be instantiated directly. The class
39
+ works as a context manager, allowing you to use a pattern like:
28
40
 
29
- This class works as a context manager, allowing you to use a pattern like
30
41
  ```python
31
42
  with Runner(...).run() as running:
32
43
  ...
33
44
  ```
34
- Note that you should use either this object as the context manager or
35
- `Runner`, not both in a nested manner.
45
+
46
+ Note that you should use either this object as the context manager or `Runner`, not both
47
+ in a nested manner.
36
48
  """
37
49
 
38
- def __init__(
39
- self, runner: "Runner", command_obj: CommandManager, run_obj: Run
40
- ) -> None:
50
+ def __init__(self, runner: "Runner", command_obj: CommandManager) -> None:
41
51
  """
42
52
  Create a new ExecutingRun -- this should not be done by the user directly but
43
- instead user Runner.run()
53
+ instead use Runner.run()
44
54
 
45
55
  Parameters
46
56
  ----------
@@ -53,9 +63,8 @@ class ExecutingRun(object):
53
63
  """
54
64
  self.runner = runner
55
65
  self.command_obj = command_obj
56
- self.run = run_obj
57
66
 
58
- def __enter__(self) -> "ExecutingRun":
67
+ def __enter__(self) -> "ExecutingProcess":
59
68
  return self
60
69
 
61
70
  def __exit__(self, exc_type, exc_value, traceback):
@@ -63,7 +72,7 @@ class ExecutingRun(object):
63
72
 
64
73
  async def wait(
65
74
  self, timeout: Optional[float] = None, stream: Optional[str] = None
66
- ) -> "ExecutingRun":
75
+ ) -> "ExecutingProcess":
67
76
  """
68
77
  Wait for this run to finish, optionally with a timeout
69
78
  and optionally streaming its output.
@@ -82,7 +91,7 @@ class ExecutingRun(object):
82
91
 
83
92
  Returns
84
93
  -------
85
- ExecutingRun
94
+ ExecutingProcess
86
95
  This object, allowing you to chain calls.
87
96
  """
88
97
  await self.command_obj.wait(timeout, stream)
@@ -189,6 +198,76 @@ class ExecutingRun(object):
189
198
  yield position, line
190
199
 
191
200
 
201
+ class ExecutingTask(ExecutingProcess):
202
+ """
203
+ This class contains a reference to a `metaflow.Task` object representing
204
+ the currently executing or finished task, as well as metadata related
205
+ to the process.
206
+ `ExecutingTask` is returned by methods in `Runner` and `NBRunner`. It is not
207
+ meant to be instantiated directly.
208
+ This class works as a context manager, allowing you to use a pattern like
209
+ ```python
210
+ with Runner(...).spin() as running:
211
+ ...
212
+ ```
213
+ Note that you should use either this object as the context manager or
214
+ `Runner`, not both in a nested manner.
215
+ """
216
+
217
+ def __init__(
218
+ self, runner: "Runner", command_obj: CommandManager, task_obj: Task
219
+ ) -> None:
220
+ """
221
+ Create a new ExecutingTask -- this should not be done by the user directly but
222
+ instead use Runner.spin()
223
+ Parameters
224
+ ----------
225
+ runner : Runner
226
+ Parent runner for this task.
227
+ command_obj : CommandManager
228
+ CommandManager containing the subprocess executing this task.
229
+ task_obj : Task
230
+ Task object corresponding to this task.
231
+ """
232
+ super().__init__(runner, command_obj)
233
+ self.task = task_obj
234
+
235
+
236
+ class ExecutingRun(ExecutingProcess):
237
+ """
238
+ This class contains a reference to a `metaflow.Run` object representing
239
+ the currently executing or finished run, as well as metadata related
240
+ to the process.
241
+ `ExecutingRun` is returned by methods in `Runner` and `NBRunner`. It is not
242
+ meant to be instantiated directly.
243
+ This class works as a context manager, allowing you to use a pattern like
244
+ ```python
245
+ with Runner(...).run() as running:
246
+ ...
247
+ ```
248
+ Note that you should use either this object as the context manager or
249
+ `Runner`, not both in a nested manner.
250
+ """
251
+
252
+ def __init__(
253
+ self, runner: "Runner", command_obj: CommandManager, run_obj: Run
254
+ ) -> None:
255
+ """
256
+ Create a new ExecutingRun -- this should not be done by the user directly but
257
+ instead use Runner.run()
258
+ Parameters
259
+ ----------
260
+ runner : Runner
261
+ Parent runner for this run.
262
+ command_obj : CommandManager
263
+ CommandManager containing the subprocess executing this run.
264
+ run_obj : Run
265
+ Run object corresponding to this run.
266
+ """
267
+ super().__init__(runner, command_obj)
268
+ self.run = run_obj
269
+
270
+
192
271
  class RunnerMeta(type):
193
272
  def __new__(mcs, name, bases, dct):
194
273
  cls = super().__new__(mcs, name, bases, dct)
@@ -197,8 +276,22 @@ class RunnerMeta(type):
197
276
  def f(self, *args, **kwargs):
198
277
  return runner_subcommand(self, *args, **kwargs)
199
278
 
200
- f.__doc__ = runner_subcommand.__doc__ or ""
279
+ f.__doc__ = runner_subcommand.__init__.__doc__ or ""
201
280
  f.__name__ = subcommand_name
281
+ sig = inspect.signature(runner_subcommand)
282
+ # We take all the same parameters except replace the first with
283
+ # simple "self"
284
+ new_parameters = {}
285
+ for name, param in sig.parameters.items():
286
+ if new_parameters:
287
+ new_parameters[name] = param
288
+ else:
289
+ new_parameters["self"] = inspect.Parameter(
290
+ "self", inspect.Parameter.POSITIONAL_OR_KEYWORD
291
+ )
292
+ f.__signature__ = inspect.Signature(
293
+ list(new_parameters.values()), return_annotation=runner_subcommand
294
+ )
202
295
 
203
296
  return f
204
297
 
@@ -257,7 +350,7 @@ class Runner(metaclass=RunnerMeta):
257
350
  env: Optional[Dict[str, str]] = None,
258
351
  cwd: Optional[str] = None,
259
352
  file_read_timeout: int = 3600,
260
- **kwargs
353
+ **kwargs,
261
354
  ):
262
355
  # these imports are required here and not at the top
263
356
  # since they interfere with the user defined Parameters
@@ -299,7 +392,7 @@ class Runner(metaclass=RunnerMeta):
299
392
  if profile:
300
393
  self.env_vars["METAFLOW_PROFILE"] = profile
301
394
 
302
- self.cwd = cwd
395
+ self.cwd = cwd or os.getcwd()
303
396
  self.file_read_timeout = file_read_timeout
304
397
  self.spm = SubprocessManager()
305
398
  self.top_level_kwargs = kwargs
@@ -359,9 +452,15 @@ class Runner(metaclass=RunnerMeta):
359
452
  ExecutingRun containing the results of the run.
360
453
  """
361
454
  with temporary_fifo() as (attribute_file_path, attribute_file_fd):
362
- command = self.api(**self.top_level_kwargs).run(
363
- runner_attribute_file=attribute_file_path, **kwargs
364
- )
455
+ if CLICK_API_PROCESS_CONFIG:
456
+ with with_dir(self.cwd):
457
+ command = self.api(**self.top_level_kwargs).run(
458
+ runner_attribute_file=attribute_file_path, **kwargs
459
+ )
460
+ else:
461
+ command = self.api(**self.top_level_kwargs).run(
462
+ runner_attribute_file=attribute_file_path, **kwargs
463
+ )
365
464
 
366
465
  pid = self.spm.run_command(
367
466
  [sys.executable, *command],
@@ -373,6 +472,78 @@ class Runner(metaclass=RunnerMeta):
373
472
 
374
473
  return self.__get_executing_run(attribute_file_fd, command_obj)
375
474
 
475
+ def __get_executing_task(self, attribute_file_fd, command_obj):
476
+ content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
477
+
478
+ command_obj.sync_wait()
479
+
480
+ content = json.loads(content)
481
+ pathspec = f"{content.get('flow_name')}/{content.get('run_id')}/{content.get('step_name')}/{content.get('task_id')}"
482
+
483
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
484
+ metadata_for_flow = content.get("metadata")
485
+
486
+ task_object = Task(
487
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
488
+ )
489
+ return ExecutingTask(self, command_obj, task_object)
490
+
491
+ async def __async_get_executing_task(self, attribute_file_fd, command_obj):
492
+ content = await async_handle_timeout(
493
+ attribute_file_fd, command_obj, self.file_read_timeout
494
+ )
495
+ content = json.loads(content)
496
+ pathspec = f"{content.get('flow_name')}/{content.get('run_id')}/{content.get('step_name')}/{content.get('task_id')}"
497
+
498
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
499
+ metadata_for_flow = content.get("metadata")
500
+
501
+ task_object = Task(
502
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
503
+ )
504
+ return ExecutingTask(self, command_obj, task_object)
505
+
506
+ def spin(self, pathspec, **kwargs) -> ExecutingTask:
507
+ """
508
+ Blocking spin execution of the run.
509
+ This method will wait until the spun run has completed execution.
510
+ Parameters
511
+ ----------
512
+ pathspec : str
513
+ The pathspec of the step/task to spin.
514
+ **kwargs : Any
515
+ Additional arguments that you would pass to `python ./myflow.py` after
516
+ the `spin` command.
517
+ Returns
518
+ -------
519
+ ExecutingTask
520
+ ExecutingTask containing the results of the spun task.
521
+ """
522
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
523
+ if CLICK_API_PROCESS_CONFIG:
524
+ with with_dir(self.cwd):
525
+ command = self.api(**self.top_level_kwargs).spin(
526
+ pathspec=pathspec,
527
+ runner_attribute_file=attribute_file_path,
528
+ **kwargs,
529
+ )
530
+ else:
531
+ command = self.api(**self.top_level_kwargs).spin(
532
+ pathspec=pathspec,
533
+ runner_attribute_file=attribute_file_path,
534
+ **kwargs,
535
+ )
536
+
537
+ pid = self.spm.run_command(
538
+ [sys.executable, *command],
539
+ env=self.env_vars,
540
+ cwd=self.cwd,
541
+ show_output=self.show_output,
542
+ )
543
+ command_obj = self.spm.get(pid)
544
+
545
+ return self.__get_executing_task(attribute_file_fd, command_obj)
546
+
376
547
  def resume(self, **kwargs) -> ExecutingRun:
377
548
  """
378
549
  Blocking resume execution of the run.
@@ -390,9 +561,15 @@ class Runner(metaclass=RunnerMeta):
390
561
  ExecutingRun containing the results of the resumed run.
391
562
  """
392
563
  with temporary_fifo() as (attribute_file_path, attribute_file_fd):
393
- command = self.api(**self.top_level_kwargs).resume(
394
- runner_attribute_file=attribute_file_path, **kwargs
395
- )
564
+ if CLICK_API_PROCESS_CONFIG:
565
+ with with_dir(self.cwd):
566
+ command = self.api(**self.top_level_kwargs).resume(
567
+ runner_attribute_file=attribute_file_path, **kwargs
568
+ )
569
+ else:
570
+ command = self.api(**self.top_level_kwargs).resume(
571
+ runner_attribute_file=attribute_file_path, **kwargs
572
+ )
396
573
 
397
574
  pid = self.spm.run_command(
398
575
  [sys.executable, *command],
@@ -423,9 +600,15 @@ class Runner(metaclass=RunnerMeta):
423
600
  ExecutingRun representing the run that was started.
424
601
  """
425
602
  with temporary_fifo() as (attribute_file_path, attribute_file_fd):
426
- command = self.api(**self.top_level_kwargs).run(
427
- runner_attribute_file=attribute_file_path, **kwargs
428
- )
603
+ if CLICK_API_PROCESS_CONFIG:
604
+ with with_dir(self.cwd):
605
+ command = self.api(**self.top_level_kwargs).run(
606
+ runner_attribute_file=attribute_file_path, **kwargs
607
+ )
608
+ else:
609
+ command = self.api(**self.top_level_kwargs).run(
610
+ runner_attribute_file=attribute_file_path, **kwargs
611
+ )
429
612
 
430
613
  pid = await self.spm.async_run_command(
431
614
  [sys.executable, *command],
@@ -455,9 +638,15 @@ class Runner(metaclass=RunnerMeta):
455
638
  ExecutingRun representing the resumed run that was started.
456
639
  """
457
640
  with temporary_fifo() as (attribute_file_path, attribute_file_fd):
458
- command = self.api(**self.top_level_kwargs).resume(
459
- runner_attribute_file=attribute_file_path, **kwargs
460
- )
641
+ if CLICK_API_PROCESS_CONFIG:
642
+ with with_dir(self.cwd):
643
+ command = self.api(**self.top_level_kwargs).resume(
644
+ runner_attribute_file=attribute_file_path, **kwargs
645
+ )
646
+ else:
647
+ command = self.api(**self.top_level_kwargs).resume(
648
+ runner_attribute_file=attribute_file_path, **kwargs
649
+ )
461
650
 
462
651
  pid = await self.spm.async_run_command(
463
652
  [sys.executable, *command],
@@ -468,6 +657,50 @@ class Runner(metaclass=RunnerMeta):
468
657
 
469
658
  return await self.__async_get_executing_run(attribute_file_fd, command_obj)
470
659
 
660
+ async def async_spin(self, pathspec, **kwargs) -> ExecutingTask:
661
+ """
662
+ Non-blocking spin execution of the run.
663
+ This method will return as soon as the spun task has launched.
664
+
665
+ Note that this method is asynchronous and needs to be `await`ed.
666
+
667
+ Parameters
668
+ ----------
669
+ pathspec : str
670
+ The pathspec of the step/task to spin.
671
+ **kwargs : Any
672
+ Additional arguments that you would pass to `python ./myflow.py` after
673
+ the `spin` command.
674
+
675
+ Returns
676
+ -------
677
+ ExecutingTask
678
+ ExecutingTask representing the spun task that was started.
679
+ """
680
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
681
+ if CLICK_API_PROCESS_CONFIG:
682
+ with with_dir(self.cwd):
683
+ command = self.api(**self.top_level_kwargs).spin(
684
+ pathspec=pathspec,
685
+ runner_attribute_file=attribute_file_path,
686
+ **kwargs,
687
+ )
688
+ else:
689
+ command = self.api(**self.top_level_kwargs).spin(
690
+ pathspec=pathspec,
691
+ runner_attribute_file=attribute_file_path,
692
+ **kwargs,
693
+ )
694
+
695
+ pid = await self.spm.async_run_command(
696
+ [sys.executable, *command],
697
+ env=self.env_vars,
698
+ cwd=self.cwd,
699
+ )
700
+ command_obj = self.spm.get(pid)
701
+
702
+ return await self.__async_get_executing_task(attribute_file_fd, command_obj)
703
+
471
704
  def __exit__(self, exc_type, exc_value, traceback):
472
705
  self.spm.cleanup()
473
706
 
@@ -9,6 +9,8 @@ import tempfile
9
9
  import threading
10
10
  from typing import Callable, Dict, Iterator, List, Optional, Tuple
11
11
 
12
+ from metaflow.packaging_sys import MetaflowCodeContent
13
+ from metaflow.util import get_metaflow_root
12
14
  from .utils import check_process_exited
13
15
 
14
16
 
@@ -150,6 +152,19 @@ class SubprocessManager(object):
150
152
  int
151
153
  The process ID of the subprocess.
152
154
  """
155
+ env = env or {}
156
+ installed_root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
157
+
158
+ for k, v in MetaflowCodeContent.get_env_vars_for_packaged_metaflow(
159
+ installed_root
160
+ ).items():
161
+ if k.endswith(":"):
162
+ # Override
163
+ env[k[:-1]] = v
164
+ elif k in env:
165
+ env[k] = "%s:%s" % (v, env[k])
166
+ else:
167
+ env[k] = v
153
168
 
154
169
  command_obj = CommandManager(command, env, cwd)
155
170
  pid = command_obj.run(show_output=show_output)
@@ -181,6 +196,11 @@ class SubprocessManager(object):
181
196
  int
182
197
  The process ID of the subprocess.
183
198
  """
199
+ env = env or {}
200
+ if "PYTHONPATH" in env:
201
+ env["PYTHONPATH"] = "%s:%s" % (get_metaflow_root(), env["PYTHONPATH"])
202
+ else:
203
+ env["PYTHONPATH"] = get_metaflow_root()
184
204
 
185
205
  command_obj = CommandManager(command, env, cwd)
186
206
  pid = await command_obj.async_run()
@@ -237,7 +257,7 @@ class CommandManager(object):
237
257
  self.command = command
238
258
 
239
259
  self.env = env if env is not None else os.environ.copy()
240
- self.cwd = cwd if cwd is not None else os.getcwd()
260
+ self.cwd = cwd or os.getcwd()
241
261
 
242
262
  self.process = None
243
263
  self.stdout_thread = None
metaflow/runner/utils.py CHANGED
@@ -4,6 +4,7 @@ import time
4
4
  import asyncio
5
5
  import tempfile
6
6
  import select
7
+ import fcntl
7
8
  from contextlib import contextmanager
8
9
  from subprocess import CalledProcessError
9
10
  from typing import Any, Dict, TYPE_CHECKING, ContextManager, Tuple
@@ -109,7 +110,6 @@ def read_from_fifo_when_ready(
109
110
  content = bytearray()
110
111
  poll = select.poll()
111
112
  poll.register(fifo_fd, select.POLLIN)
112
- max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
113
113
  while True:
114
114
  if check_process_exited(command_obj) and command_obj.process.returncode != 0:
115
115
  raise CalledProcessError(
@@ -130,6 +130,21 @@ def read_from_fifo_when_ready(
130
130
  data = os.read(fifo_fd, 8192)
131
131
  if data:
132
132
  content += data
133
+ # We got data! Now switch to blocking mode for guaranteed complete reads.
134
+ # In blocking mode, read() won't return 0 until writer closes AND all
135
+ # kernel buffers are drained - this is POSIX guaranteed.
136
+ flags = fcntl.fcntl(fifo_fd, fcntl.F_GETFL)
137
+ fcntl.fcntl(fifo_fd, fcntl.F_SETFL, flags & ~os.O_NONBLOCK)
138
+
139
+ # Now do blocking reads until true EOF
140
+ while True:
141
+ chunk = os.read(fifo_fd, 8192)
142
+ if not chunk:
143
+ # True EOF - all data drained
144
+ break
145
+ content += chunk
146
+ # All data read, exit main loop
147
+ break
133
148
  else:
134
149
  if len(events):
135
150
  # We read an EOF -- consider the file done
@@ -137,22 +152,10 @@ def read_from_fifo_when_ready(
137
152
  else:
138
153
  # We had no events (just a timeout) and the read didn't return
139
154
  # an exception so the file is still open; we continue waiting for data
140
- # Unfortunately, on MacOS, it seems that even *after* the file is
141
- # closed on the other end, we still don't get a BlockingIOError so
142
- # we hack our way and timeout if there is no write in 30ms which is
143
- # a relative eternity for file writes.
144
- if content:
145
- if max_timeout <= 0:
146
- break
147
- max_timeout -= 1
148
- continue
155
+ pass
149
156
  except BlockingIOError:
150
- has_blocking_error = True
151
- if content:
152
- # The file was closed
153
- break
154
- # else, if we have no content, we continue waiting for the file to be open
155
- # and written to.
157
+ # File not ready yet, continue waiting
158
+ pass
156
159
 
157
160
  if not content and check_process_exited(command_obj):
158
161
  raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -322,3 +325,11 @@ def get_lower_level_group(
322
325
  raise ValueError(f"Sub-command '{sub_command}' not found in API '{api.name}'")
323
326
 
324
327
  return sub_command_obj(**sub_command_kwargs)
328
+
329
+
330
+ @contextmanager
331
+ def with_dir(new_dir):
332
+ current_dir = os.getcwd()
333
+ os.chdir(new_dir)
334
+ yield new_dir
335
+ os.chdir(current_dir)