ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.13.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (65) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +180 -718
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +360 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/datastore/flow_datastore.py +2 -2
  12. metaflow/decorators.py +63 -2
  13. metaflow/exception.py +8 -2
  14. metaflow/extension_support/plugins.py +41 -27
  15. metaflow/flowspec.py +175 -23
  16. metaflow/graph.py +28 -27
  17. metaflow/includefile.py +50 -22
  18. metaflow/lint.py +35 -20
  19. metaflow/metaflow_config.py +6 -1
  20. metaflow/package.py +17 -3
  21. metaflow/parameters.py +87 -23
  22. metaflow/plugins/__init__.py +4 -0
  23. metaflow/plugins/airflow/airflow_cli.py +1 -0
  24. metaflow/plugins/argo/argo_workflows.py +41 -1
  25. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  26. metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
  27. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  28. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  29. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  30. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  31. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  32. metaflow/plugins/cards/card_creator.py +1 -0
  33. metaflow/plugins/cards/card_decorator.py +46 -8
  34. metaflow/plugins/datatools/s3/s3op.py +3 -3
  35. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  36. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  37. metaflow/plugins/pypi/bootstrap.py +196 -61
  38. metaflow/plugins/pypi/conda_decorator.py +20 -10
  39. metaflow/plugins/pypi/conda_environment.py +76 -21
  40. metaflow/plugins/pypi/micromamba.py +42 -15
  41. metaflow/plugins/pypi/pip.py +8 -3
  42. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  43. metaflow/plugins/timeout_decorator.py +2 -2
  44. metaflow/runner/click_api.py +240 -50
  45. metaflow/runner/deployer.py +1 -1
  46. metaflow/runner/deployer_impl.py +8 -3
  47. metaflow/runner/metaflow_runner.py +10 -2
  48. metaflow/runner/nbdeploy.py +2 -0
  49. metaflow/runner/nbrun.py +1 -1
  50. metaflow/runner/subprocess_manager.py +3 -1
  51. metaflow/runner/utils.py +41 -19
  52. metaflow/runtime.py +111 -73
  53. metaflow/sidecar/sidecar_worker.py +1 -1
  54. metaflow/user_configs/__init__.py +0 -0
  55. metaflow/user_configs/config_decorators.py +563 -0
  56. metaflow/user_configs/config_options.py +548 -0
  57. metaflow/user_configs/config_parameters.py +405 -0
  58. metaflow/util.py +17 -0
  59. metaflow/version.py +1 -1
  60. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/METADATA +3 -2
  61. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/RECORD +65 -55
  62. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/LICENSE +0 -0
  63. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/WHEEL +0 -0
  64. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/entry_points.txt +0 -0
  65. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
+ import functools
12
13
  import importlib
13
14
  import inspect
14
15
  import itertools
@@ -17,6 +18,7 @@ import json
17
18
  from collections import OrderedDict
18
19
  from typing import Any, Callable, Dict, List, Optional
19
20
  from typing import OrderedDict as TOrderedDict
21
+ from typing import Tuple as TTuple
20
22
  from typing import Union
21
23
 
22
24
  from metaflow import FlowSpec, Parameter
@@ -37,7 +39,16 @@ from metaflow._vendor.typeguard import TypeCheckError, check_type
37
39
  from metaflow.decorators import add_decorator_options
38
40
  from metaflow.exception import MetaflowException
39
41
  from metaflow.includefile import FilePathClass
42
+ from metaflow.metaflow_config import CLICK_API_PROCESS_CONFIG
40
43
  from metaflow.parameters import JSONTypeClass, flow_context
44
+ from metaflow.user_configs.config_options import (
45
+ ConfigValue,
46
+ ConvertDictOrStr,
47
+ ConvertPath,
48
+ LocalFileInput,
49
+ MultipleTuple,
50
+ config_options_with_config_input,
51
+ )
41
52
 
42
53
  # Define a recursive type alias for JSON
43
54
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -55,6 +66,8 @@ click_to_python_types = {
55
66
  File: str,
56
67
  JSONTypeClass: JSON,
57
68
  FilePathClass: str,
69
+ LocalFileInput: str,
70
+ MultipleTuple: TTuple[str, Union[JSON, ConfigValue]],
58
71
  }
59
72
 
60
73
 
@@ -65,7 +78,7 @@ def _method_sanity_check(
65
78
  defaults: TOrderedDict[str, Any],
66
79
  **kwargs
67
80
  ) -> Dict[str, Any]:
68
- method_params = {"args": {}, "options": {}}
81
+ method_params = {"args": {}, "options": {}, "defaults": defaults}
69
82
 
70
83
  possible_params = OrderedDict()
71
84
  possible_params.update(possible_arg_params)
@@ -87,10 +100,26 @@ def _method_sanity_check(
87
100
  % (supplied_k, annotations[supplied_k], defaults[supplied_k])
88
101
  )
89
102
 
90
- # because Click expects stringified JSON..
91
- supplied_v = (
92
- json.dumps(supplied_v) if annotations[supplied_k] == JSON else supplied_v
93
- )
103
+ # Clean up values to make them into what click expects
104
+ if annotations[supplied_k] == JSON:
105
+ # JSON should be a string (json dumps)
106
+ supplied_v = json.dumps(supplied_v)
107
+ elif supplied_k == "config_value":
108
+ # Special handling of config value because we need to go look in the tuple
109
+ new_list = []
110
+ for cfg_name, cfg_value in supplied_v:
111
+ if isinstance(cfg_value, ConfigValue):
112
+ # ConfigValue should be JSONified and converted to a string
113
+ new_list.append((cfg_name, json.dumps(cfg_value.to_dict())))
114
+ elif isinstance(cfg_value, dict):
115
+ # ConfigValue passed as a dictionary
116
+ new_list.append((cfg_name, json.dumps(cfg_value)))
117
+ else:
118
+ raise TypeError(
119
+ "Invalid type for a config-value, expected a ConfigValue or "
120
+ "dict but got '%s'" % type(cfg_value)
121
+ )
122
+ supplied_v = new_list
94
123
 
95
124
  if supplied_k in possible_arg_params:
96
125
  cli_name = possible_arg_params[supplied_k].opts[0].strip("-")
@@ -124,6 +153,37 @@ def _method_sanity_check(
124
153
  return method_params
125
154
 
126
155
 
156
+ def _lazy_load_command(
157
+ cli_collection: click.Group,
158
+ flow_parameters: Union[str, List[Parameter]],
159
+ _self,
160
+ name: str,
161
+ ):
162
+
163
+ # Context is not used in get_command so we can pass None. Since we pin click,
164
+ # this won't change from under us.
165
+
166
+ if isinstance(flow_parameters, str):
167
+ # Resolve flow_parameters -- for start, this is a function which we
168
+ # need to call to figure out the actual parameters (may be changed by configs)
169
+ flow_parameters = getattr(_self, flow_parameters)()
170
+ cmd_obj = cli_collection.get_command(None, name)
171
+ if cmd_obj:
172
+ if isinstance(cmd_obj, click.Group):
173
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
174
+ result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
175
+ elif isinstance(cmd_obj, click.Command):
176
+ result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
177
+ else:
178
+ raise RuntimeError(
179
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
180
+ )
181
+ setattr(_self, name, result)
182
+ return result
183
+ else:
184
+ raise AttributeError()
185
+
186
+
127
187
  def get_annotation(param: Union[click.Argument, click.Option]):
128
188
  py_type = click_to_python_types[type(param.type)]
129
189
  if not param.required:
@@ -154,34 +214,57 @@ loaded_modules = {}
154
214
  def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
155
215
  if not os.path.exists(flow_file):
156
216
  raise FileNotFoundError("Flow file not present at '%s'" % flow_file)
157
- # Check if the module has already been loaded
158
- if flow_file in loaded_modules:
159
- module = loaded_modules[flow_file]
160
- else:
161
- # Load the module if it's not already loaded
162
- spec = importlib.util.spec_from_file_location("module", flow_file)
163
- module = importlib.util.module_from_spec(spec)
164
- spec.loader.exec_module(module)
165
- # Cache the loaded module
166
- loaded_modules[flow_file] = module
167
- classes = inspect.getmembers(module, inspect.isclass)
168
-
169
- flow_cls = None
170
- for _, kls in classes:
171
- if kls != FlowSpec and issubclass(kls, FlowSpec):
172
- if flow_cls is not None:
173
- raise MetaflowException(
174
- "Multiple FlowSpec classes found in %s" % flow_file
175
- )
176
- flow_cls = kls
177
-
178
- return flow_cls
217
+
218
+ flow_dir = os.path.dirname(os.path.abspath(flow_file))
219
+ path_was_added = False
220
+
221
+ # Only add to path if it's not already there
222
+ if flow_dir not in sys.path:
223
+ sys.path.insert(0, flow_dir)
224
+ path_was_added = True
225
+
226
+ try:
227
+ # Check if the module has already been loaded
228
+ if flow_file in loaded_modules:
229
+ module = loaded_modules[flow_file]
230
+ else:
231
+ # Load the module if it's not already loaded
232
+ spec = importlib.util.spec_from_file_location("module", flow_file)
233
+ module = importlib.util.module_from_spec(spec)
234
+ spec.loader.exec_module(module)
235
+ # Cache the loaded module
236
+ loaded_modules[flow_file] = module
237
+ classes = inspect.getmembers(module, inspect.isclass)
238
+
239
+ flow_cls = None
240
+ for _, kls in classes:
241
+ if kls != FlowSpec and issubclass(kls, FlowSpec):
242
+ if flow_cls is not None:
243
+ raise MetaflowException(
244
+ "Multiple FlowSpec classes found in %s" % flow_file
245
+ )
246
+ flow_cls = kls
247
+
248
+ if flow_cls is None:
249
+ raise MetaflowException("No FlowSpec class found in %s" % flow_file)
250
+ return flow_cls
251
+ finally:
252
+ # Only remove from path if we added it
253
+ if path_was_added:
254
+ try:
255
+ sys.path.remove(flow_dir)
256
+ except ValueError:
257
+ # User's code might have removed it already
258
+ pass
179
259
 
180
260
 
181
261
  class MetaflowAPI(object):
182
- def __init__(self, parent=None, **kwargs):
262
+ def __init__(self, parent=None, flow_cls=None, config_input=None, **kwargs):
183
263
  self._parent = parent
184
264
  self._chain = [{self._API_NAME: kwargs}]
265
+ self._flow_cls = flow_cls
266
+ self._config_input = config_input
267
+ self._cached_computed_parameters = None
185
268
 
186
269
  @property
187
270
  def parent(self):
@@ -200,23 +283,30 @@ class MetaflowAPI(object):
200
283
  @classmethod
201
284
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
202
285
  flow_cls = extract_flow_class_from_file(flow_file)
203
- flow_parameters = [p for _, p in flow_cls._get_parameters()]
286
+
204
287
  with flow_context(flow_cls) as _:
205
- add_decorator_options(cli_collection)
206
-
207
- class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
- command_groups = cli_collection.sources
209
- for each_group in command_groups:
210
- for _, cmd_obj in each_group.commands.items():
211
- if isinstance(cmd_obj, click.Group):
212
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
- class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
- elif isinstance(cmd_obj, click.Command):
215
- class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
- else:
217
- raise RuntimeError(
218
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
- )
288
+ cli_collection, config_input = config_options_with_config_input(
289
+ cli_collection
290
+ )
291
+ cli_collection = add_decorator_options(cli_collection)
292
+
293
+ def getattr_wrapper(_self, name):
294
+ # Functools.partial do not automatically bind self (no __get__)
295
+ with flow_context(flow_cls) as _:
296
+ # We also wrap this in the proper flow context because since commands
297
+ # are loaded lazily, we need the proper flow context to compute things
298
+ # like parameters. If we do not do this, the outer flow's context will
299
+ # be used.
300
+ return _self._internal_getattr(_self, name)
301
+
302
+ class_dict = {
303
+ "__module__": "metaflow",
304
+ "_API_NAME": flow_file,
305
+ "_internal_getattr": functools.partial(
306
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
307
+ ),
308
+ "__getattr__": getattr_wrapper,
309
+ }
220
310
 
221
311
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
222
312
  to_return.__name__ = flow_file
@@ -237,11 +327,16 @@ class MetaflowAPI(object):
237
327
  defaults,
238
328
  **kwargs,
239
329
  )
240
- return to_return(parent=None, **method_params)
330
+ return to_return(
331
+ parent=None,
332
+ flow_cls=flow_cls,
333
+ config_input=config_input,
334
+ **method_params,
335
+ )
241
336
 
242
337
  m = _method
243
- m.__name__ = cmd_obj.name
244
- m.__doc__ = getattr(cmd_obj, "help", None)
338
+ m.__name__ = cli_collection.name
339
+ m.__doc__ = getattr(cli_collection, "help", None)
245
340
  m.__signature__ = inspect.signature(_method).replace(
246
341
  parameters=params_sigs.values()
247
342
  )
@@ -278,8 +373,12 @@ class MetaflowAPI(object):
278
373
  for k, v in options.items():
279
374
  if isinstance(v, list):
280
375
  for i in v:
281
- components.append("--%s" % k)
282
- components.append(str(i))
376
+ if isinstance(i, tuple):
377
+ components.append("--%s" % k)
378
+ components.extend(map(str, i))
379
+ else:
380
+ components.append("--%s" % k)
381
+ components.append(str(i))
283
382
  else:
284
383
  components.append("--%s" % k)
285
384
  if v != "flag":
@@ -287,6 +386,97 @@ class MetaflowAPI(object):
287
386
 
288
387
  return components
289
388
 
389
+ def _compute_flow_parameters(self):
390
+ if (
391
+ self._flow_cls is None
392
+ or self._config_input is None
393
+ or self._parent is not None
394
+ ):
395
+ raise RuntimeError(
396
+ "Computing flow-level parameters for a non start API. "
397
+ "Please report to the Metaflow team."
398
+ )
399
+
400
+ if self._cached_computed_parameters is not None:
401
+ return self._cached_computed_parameters
402
+ self._cached_computed_parameters = []
403
+
404
+ config_options = None
405
+ if CLICK_API_PROCESS_CONFIG:
406
+ with flow_context(self._flow_cls) as _:
407
+ # We are going to resolve the configs first and then get the parameters.
408
+ # Note that configs may update/add parameters so the order is important
409
+ # Since part of the processing of configs happens by click, we need to
410
+ # "fake" it.
411
+
412
+ # Extract any config options as well as datastore and quiet options
413
+ method_params = self._chain[0][self._API_NAME]
414
+ opts = method_params["options"]
415
+ defaults = method_params["defaults"]
416
+
417
+ ds = opts.get("datastore", defaults["datastore"])
418
+ quiet = opts.get("quiet", defaults["quiet"])
419
+ is_default = False
420
+ config_file = opts.get("config-file")
421
+ if config_file is None:
422
+ is_default = True
423
+ config_file = defaults.get("config_file")
424
+
425
+ if config_file:
426
+ config_file = map(
427
+ lambda x: (x[0], ConvertPath.convert_value(x[1], is_default)),
428
+ config_file,
429
+ )
430
+
431
+ is_default = False
432
+ config_value = opts.get("config-value")
433
+ if config_value is None:
434
+ is_default = True
435
+ config_value = defaults.get("config_value")
436
+
437
+ if config_value:
438
+ config_value = map(
439
+ lambda x: (
440
+ x[0],
441
+ ConvertDictOrStr.convert_value(x[1], is_default),
442
+ ),
443
+ config_value,
444
+ )
445
+
446
+ if (config_file is None) ^ (config_value is None):
447
+ # If we have one, we should have the other
448
+ raise MetaflowException(
449
+ "Options were not properly set -- this is an internal error."
450
+ )
451
+
452
+ if config_file:
453
+ # Process both configurations; the second one will return all the merged
454
+ # configuration options properly processed.
455
+ self._config_input.process_configs(
456
+ self._flow_cls.__name__, "config_file", config_file, quiet, ds
457
+ )
458
+ config_options = self._config_input.process_configs(
459
+ self._flow_cls.__name__, "config_value", config_value, quiet, ds
460
+ )
461
+
462
+ # At this point, we are like in start() in cli.py -- we obtained the
463
+ # properly processed config_options which we can now use to process
464
+ # the config decorators (including CustomStep/FlowDecorators)
465
+ # Note that if CLICK_API_PROCESS_CONFIG is False, we still do this because
466
+ # it will init all parameters (config_options will be None)
467
+ # We ignore any errors if we don't check the configs in the click API.
468
+ new_cls = self._flow_cls._process_config_decorators(
469
+ config_options, ignore_errors=not CLICK_API_PROCESS_CONFIG
470
+ )
471
+ if new_cls:
472
+ self._flow_cls = new_cls
473
+
474
+ for _, param in self._flow_cls._get_parameters():
475
+ if param.IS_CONFIG_PARAMETER:
476
+ continue
477
+ self._cached_computed_parameters.append(param)
478
+ return self._cached_computed_parameters
479
+
290
480
 
291
481
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
292
482
  arg_params_sigs = OrderedDict()
@@ -351,7 +541,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
351
541
  method_params = _method_sanity_check(
352
542
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
353
543
  )
354
- return resulting_class(parent=_self, **method_params)
544
+ return resulting_class(parent=_self, flow_cls=None, **method_params)
355
545
 
356
546
  m = _method
357
547
  m.__name__ = cmd_obj.name
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file.
67
+ The timeout until which we try to read the deployer attribute file (in seconds).
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -37,7 +37,7 @@ class DeployerImpl(object):
37
37
  The directory to run the subprocess in; if not specified, the current
38
38
  directory is used.
39
39
  file_read_timeout : int, default 3600
40
- The timeout until which we try to read the deployer attribute file.
40
+ The timeout until which we try to read the deployer attribute file (in seconds).
41
41
  **kwargs : Any
42
42
  Additional arguments that you would pass to `python myflow.py` before
43
43
  the deployment command.
@@ -61,8 +61,13 @@ class DeployerImpl(object):
61
61
  "of DeployerImpl."
62
62
  )
63
63
 
64
+ from metaflow.parameters import flow_context
65
+
64
66
  if "metaflow.cli" in sys.modules:
65
- importlib.reload(sys.modules["metaflow.cli"])
67
+ # Reload the CLI with an "empty" flow -- this will remove any configuration
68
+ # options. They are re-added in from_cli (called below).
69
+ with flow_context(None) as _:
70
+ importlib.reload(sys.modules["metaflow.cli"])
66
71
  from metaflow.cli import start
67
72
  from metaflow.runner.click_api import MetaflowAPI
68
73
 
@@ -144,7 +149,7 @@ class DeployerImpl(object):
144
149
  # Additional info is used to pass additional deployer specific information.
145
150
  # It is used in non-OSS deployers (extensions).
146
151
  self.additional_info = content.get("additional_info", {})
147
-
152
+ command_obj.sync_wait()
148
153
  if command_obj.process.returncode == 0:
149
154
  return create_class(deployer=self)
150
155
 
@@ -221,7 +221,7 @@ class Runner(object):
221
221
  The directory to run the subprocess in; if not specified, the current
222
222
  directory is used.
223
223
  file_read_timeout : int, default 3600
224
- The timeout until which we try to read the runner attribute file.
224
+ The timeout until which we try to read the runner attribute file (in seconds).
225
225
  **kwargs : Any
226
226
  Additional arguments that you would pass to `python myflow.py` before
227
227
  the `run` command.
@@ -245,8 +245,13 @@ class Runner(object):
245
245
  # This ability is made possible by the statement:
246
246
  # 'from .metaflow_runner import Runner' in '__init__.py'
247
247
 
248
+ from metaflow.parameters import flow_context
249
+
248
250
  if "metaflow.cli" in sys.modules:
249
- importlib.reload(sys.modules["metaflow.cli"])
251
+ # Reload the CLI with an "empty" flow -- this will remove any configuration
252
+ # options. They are re-added in from_cli (called below).
253
+ with flow_context(None) as _:
254
+ importlib.reload(sys.modules["metaflow.cli"])
250
255
  from metaflow.cli import start
251
256
  from metaflow.runner.click_api import MetaflowAPI
252
257
 
@@ -272,6 +277,9 @@ class Runner(object):
272
277
 
273
278
  def __get_executing_run(self, attribute_file_fd, command_obj):
274
279
  content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
280
+
281
+ command_obj.sync_wait()
282
+
275
283
  content = json.loads(content)
276
284
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
277
285
 
@@ -46,6 +46,8 @@ class NBDeployer(object):
46
46
  base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
+ file_read_timeout : int, default 3600
50
+ The timeout until which we try to read the deployer attribute file (in seconds).
49
51
  **kwargs : Any
50
52
  Additional arguments that you would pass to `python myflow.py` i.e. options
51
53
  listed in `python myflow.py --help`
metaflow/runner/nbrun.py CHANGED
@@ -44,7 +44,7 @@ class NBRunner(object):
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
47
- The timeout until which we try to read the runner attribute file.
47
+ The timeout until which we try to read the runner attribute file (in seconds).
48
48
  **kwargs : Any
49
49
  Additional arguments that you would pass to `python myflow.py` before
50
50
  the `run` command.
@@ -120,6 +120,9 @@ class SubprocessManager(object):
120
120
  """
121
121
  Run a command synchronously and return its process ID.
122
122
 
123
+ Note: in no case does this wait for the process to *finish*. Use sync_wait()
124
+ to wait for the command to finish.
125
+
123
126
  Parameters
124
127
  ----------
125
128
  command : List[str]
@@ -145,7 +148,6 @@ class SubprocessManager(object):
145
148
  command_obj = CommandManager(command, env, cwd)
146
149
  pid = command_obj.run(show_output=show_output)
147
150
  self.commands[pid] = command_obj
148
- command_obj.sync_wait()
149
151
  return pid
150
152
 
151
153
  async def async_run_command(
metaflow/runner/utils.py CHANGED
@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
91
91
  encoding : str, optional
92
92
  Encoding to use while reading the file, by default "utf-8".
93
93
  timeout : int, optional
94
- Timeout for reading the file in milliseconds, by default 3600.
94
+ Timeout for reading the file in seconds, by default 3600.
95
95
 
96
96
  Returns
97
97
  -------
@@ -107,30 +107,52 @@ def read_from_fifo_when_ready(
107
107
  content to the FIFO.
108
108
  """
109
109
  content = bytearray()
110
-
111
110
  poll = select.poll()
112
111
  poll.register(fifo_fd, select.POLLIN)
113
-
112
+ max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
114
113
  while True:
115
- poll_begin = time.time()
116
- poll.poll(timeout)
117
- timeout -= 1000 * (time.time() - poll_begin)
114
+ if check_process_exited(command_obj) and command_obj.process.returncode != 0:
115
+ raise CalledProcessError(
116
+ command_obj.process.returncode, command_obj.command
117
+ )
118
118
 
119
- if timeout <= 0:
119
+ if timeout < 0:
120
120
  raise TimeoutError("Timeout while waiting for the file content")
121
121
 
122
+ poll_begin = time.time()
123
+ # We poll for a very short time to be also able to check if the file was closed
124
+ # If the file is closed, we assume that we only have one writer so if we have
125
+ # data, we break out. This is to work around issues in macos
126
+ events = poll.poll(min(10, timeout * 1000))
127
+ timeout -= time.time() - poll_begin
128
+
122
129
  try:
123
- data = os.read(fifo_fd, 128)
124
- while data:
130
+ data = os.read(fifo_fd, 8192)
131
+ if data:
125
132
  content += data
126
- data = os.read(fifo_fd, 128)
127
-
128
- # Read from a non-blocking closed FIFO returns an empty byte array
129
- break
130
-
133
+ else:
134
+ if len(events):
135
+ # We read an EOF -- consider the file done
136
+ break
137
+ else:
138
+ # We had no events (just a timeout) and the read didn't return
139
+ # an exception so the file is still open; we continue waiting for data
140
+ # Unfortunately, on MacOS, it seems that even *after* the file is
141
+ # closed on the other end, we still don't get a BlockingIOError so
142
+ # we hack our way and timeout if there is no write in 30ms which is
143
+ # a relative eternity for file writes.
144
+ if content:
145
+ if max_timeout <= 0:
146
+ break
147
+ max_timeout -= 1
148
+ continue
131
149
  except BlockingIOError:
132
- # FIFO is open but no data is available yet
133
- continue
150
+ has_blocking_error = True
151
+ if content:
152
+ # The file was closed
153
+ break
154
+ # else, if we have no content, we continue waiting for the file to be open
155
+ # and written to.
134
156
 
135
157
  if not content and check_process_exited(command_obj):
136
158
  raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -156,7 +178,7 @@ async def async_read_from_fifo_when_ready(
156
178
  encoding : str, optional
157
179
  Encoding to use while reading the file, by default "utf-8".
158
180
  timeout : int, optional
159
- Timeout for reading the file in milliseconds, by default 3600.
181
+ Timeout for reading the file in seconds, by default 3600.
160
182
 
161
183
  Returns
162
184
  -------
@@ -206,7 +228,7 @@ def handle_timeout(
206
228
  command_obj : CommandManager
207
229
  Command manager object that encapsulates the running command details.
208
230
  file_read_timeout : int
209
- Timeout for reading the file.
231
+ Timeout for reading the file, in seconds
210
232
 
211
233
  Returns
212
234
  -------
@@ -243,7 +265,7 @@ async def async_handle_timeout(
243
265
  command_obj : CommandManager
244
266
  Command manager object that encapsulates the running command details.
245
267
  file_read_timeout : int
246
- Timeout for reading the file.
268
+ Timeout for reading the file, in seconds
247
269
 
248
270
  Returns
249
271
  -------