ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (96) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cards.py +1 -0
  3. metaflow/cli.py +185 -717
  4. metaflow/cli_args.py +17 -0
  5. metaflow/cli_components/__init__.py +0 -0
  6. metaflow/cli_components/dump_cmd.py +96 -0
  7. metaflow/cli_components/init_cmd.py +51 -0
  8. metaflow/cli_components/run_cmds.py +362 -0
  9. metaflow/cli_components/step_cmd.py +176 -0
  10. metaflow/cli_components/utils.py +140 -0
  11. metaflow/cmd/develop/stub_generator.py +9 -2
  12. metaflow/datastore/flow_datastore.py +2 -2
  13. metaflow/decorators.py +63 -2
  14. metaflow/exception.py +8 -2
  15. metaflow/extension_support/plugins.py +42 -27
  16. metaflow/flowspec.py +176 -23
  17. metaflow/graph.py +28 -27
  18. metaflow/includefile.py +50 -22
  19. metaflow/lint.py +35 -20
  20. metaflow/metadata_provider/heartbeat.py +23 -8
  21. metaflow/metaflow_config.py +10 -1
  22. metaflow/multicore_utils.py +31 -14
  23. metaflow/package.py +17 -3
  24. metaflow/parameters.py +97 -25
  25. metaflow/plugins/__init__.py +22 -0
  26. metaflow/plugins/airflow/airflow.py +18 -17
  27. metaflow/plugins/airflow/airflow_cli.py +1 -0
  28. metaflow/plugins/argo/argo_client.py +0 -2
  29. metaflow/plugins/argo/argo_workflows.py +195 -132
  30. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  31. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  32. metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
  33. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  34. metaflow/plugins/aws/aws_utils.py +6 -1
  35. metaflow/plugins/aws/batch/batch_client.py +1 -3
  36. metaflow/plugins/aws/batch/batch_decorator.py +13 -13
  37. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  38. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  39. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  40. metaflow/plugins/aws/step_functions/step_functions.py +33 -1
  41. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  42. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  43. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
  44. metaflow/plugins/cards/card_cli.py +7 -2
  45. metaflow/plugins/cards/card_creator.py +1 -0
  46. metaflow/plugins/cards/card_decorator.py +79 -8
  47. metaflow/plugins/cards/card_modules/basic.py +56 -5
  48. metaflow/plugins/cards/card_modules/card.py +16 -1
  49. metaflow/plugins/cards/card_modules/components.py +64 -16
  50. metaflow/plugins/cards/card_modules/main.js +27 -25
  51. metaflow/plugins/cards/card_modules/test_cards.py +4 -4
  52. metaflow/plugins/cards/component_serializer.py +1 -1
  53. metaflow/plugins/datatools/s3/s3.py +12 -4
  54. metaflow/plugins/datatools/s3/s3op.py +3 -3
  55. metaflow/plugins/events_decorator.py +338 -186
  56. metaflow/plugins/kubernetes/kube_utils.py +84 -1
  57. metaflow/plugins/kubernetes/kubernetes.py +40 -92
  58. metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
  59. metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
  60. metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
  61. metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
  62. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  63. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  64. metaflow/plugins/parallel_decorator.py +4 -1
  65. metaflow/plugins/project_decorator.py +33 -5
  66. metaflow/plugins/pypi/bootstrap.py +249 -81
  67. metaflow/plugins/pypi/conda_decorator.py +20 -10
  68. metaflow/plugins/pypi/conda_environment.py +83 -27
  69. metaflow/plugins/pypi/micromamba.py +82 -37
  70. metaflow/plugins/pypi/pip.py +9 -6
  71. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  72. metaflow/plugins/pypi/utils.py +4 -2
  73. metaflow/plugins/timeout_decorator.py +2 -2
  74. metaflow/runner/click_api.py +240 -50
  75. metaflow/runner/deployer.py +1 -1
  76. metaflow/runner/deployer_impl.py +12 -11
  77. metaflow/runner/metaflow_runner.py +68 -34
  78. metaflow/runner/nbdeploy.py +2 -0
  79. metaflow/runner/nbrun.py +1 -1
  80. metaflow/runner/subprocess_manager.py +61 -10
  81. metaflow/runner/utils.py +208 -44
  82. metaflow/runtime.py +216 -112
  83. metaflow/sidecar/sidecar_worker.py +1 -1
  84. metaflow/tracing/tracing_modules.py +4 -1
  85. metaflow/user_configs/__init__.py +0 -0
  86. metaflow/user_configs/config_decorators.py +563 -0
  87. metaflow/user_configs/config_options.py +548 -0
  88. metaflow/user_configs/config_parameters.py +436 -0
  89. metaflow/util.py +22 -0
  90. metaflow/version.py +1 -1
  91. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
  92. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
  93. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
  94. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
  95. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
  96. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
+ import functools
12
13
  import importlib
13
14
  import inspect
14
15
  import itertools
@@ -17,6 +18,7 @@ import json
17
18
  from collections import OrderedDict
18
19
  from typing import Any, Callable, Dict, List, Optional
19
20
  from typing import OrderedDict as TOrderedDict
21
+ from typing import Tuple as TTuple
20
22
  from typing import Union
21
23
 
22
24
  from metaflow import FlowSpec, Parameter
@@ -37,7 +39,16 @@ from metaflow._vendor.typeguard import TypeCheckError, check_type
37
39
  from metaflow.decorators import add_decorator_options
38
40
  from metaflow.exception import MetaflowException
39
41
  from metaflow.includefile import FilePathClass
42
+ from metaflow.metaflow_config import CLICK_API_PROCESS_CONFIG
40
43
  from metaflow.parameters import JSONTypeClass, flow_context
44
+ from metaflow.user_configs.config_options import (
45
+ ConfigValue,
46
+ ConvertDictOrStr,
47
+ ConvertPath,
48
+ LocalFileInput,
49
+ MultipleTuple,
50
+ config_options_with_config_input,
51
+ )
41
52
 
42
53
  # Define a recursive type alias for JSON
43
54
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -55,6 +66,8 @@ click_to_python_types = {
55
66
  File: str,
56
67
  JSONTypeClass: JSON,
57
68
  FilePathClass: str,
69
+ LocalFileInput: str,
70
+ MultipleTuple: TTuple[str, Union[JSON, ConfigValue]],
58
71
  }
59
72
 
60
73
 
@@ -65,7 +78,7 @@ def _method_sanity_check(
65
78
  defaults: TOrderedDict[str, Any],
66
79
  **kwargs
67
80
  ) -> Dict[str, Any]:
68
- method_params = {"args": {}, "options": {}}
81
+ method_params = {"args": {}, "options": {}, "defaults": defaults}
69
82
 
70
83
  possible_params = OrderedDict()
71
84
  possible_params.update(possible_arg_params)
@@ -87,10 +100,26 @@ def _method_sanity_check(
87
100
  % (supplied_k, annotations[supplied_k], defaults[supplied_k])
88
101
  )
89
102
 
90
- # because Click expects stringified JSON..
91
- supplied_v = (
92
- json.dumps(supplied_v) if annotations[supplied_k] == JSON else supplied_v
93
- )
103
+ # Clean up values to make them into what click expects
104
+ if annotations[supplied_k] == JSON:
105
+ # JSON should be a string (json dumps)
106
+ supplied_v = json.dumps(supplied_v)
107
+ elif supplied_k == "config_value":
108
+ # Special handling of config value because we need to go look in the tuple
109
+ new_list = []
110
+ for cfg_name, cfg_value in supplied_v:
111
+ if isinstance(cfg_value, ConfigValue):
112
+ # ConfigValue should be JSONified and converted to a string
113
+ new_list.append((cfg_name, json.dumps(cfg_value.to_dict())))
114
+ elif isinstance(cfg_value, dict):
115
+ # ConfigValue passed as a dictionary
116
+ new_list.append((cfg_name, json.dumps(cfg_value)))
117
+ else:
118
+ raise TypeError(
119
+ "Invalid type for a config-value, expected a ConfigValue or "
120
+ "dict but got '%s'" % type(cfg_value)
121
+ )
122
+ supplied_v = new_list
94
123
 
95
124
  if supplied_k in possible_arg_params:
96
125
  cli_name = possible_arg_params[supplied_k].opts[0].strip("-")
@@ -124,6 +153,37 @@ def _method_sanity_check(
124
153
  return method_params
125
154
 
126
155
 
156
+ def _lazy_load_command(
157
+ cli_collection: click.Group,
158
+ flow_parameters: Union[str, List[Parameter]],
159
+ _self,
160
+ name: str,
161
+ ):
162
+
163
+ # Context is not used in get_command so we can pass None. Since we pin click,
164
+ # this won't change from under us.
165
+
166
+ if isinstance(flow_parameters, str):
167
+ # Resolve flow_parameters -- for start, this is a function which we
168
+ # need to call to figure out the actual parameters (may be changed by configs)
169
+ flow_parameters = getattr(_self, flow_parameters)()
170
+ cmd_obj = cli_collection.get_command(None, name)
171
+ if cmd_obj:
172
+ if isinstance(cmd_obj, click.Group):
173
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
174
+ result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
175
+ elif isinstance(cmd_obj, click.Command):
176
+ result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
177
+ else:
178
+ raise RuntimeError(
179
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
180
+ )
181
+ setattr(_self, name, result)
182
+ return result
183
+ else:
184
+ raise AttributeError()
185
+
186
+
127
187
  def get_annotation(param: Union[click.Argument, click.Option]):
128
188
  py_type = click_to_python_types[type(param.type)]
129
189
  if not param.required:
@@ -154,34 +214,57 @@ loaded_modules = {}
154
214
  def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
155
215
  if not os.path.exists(flow_file):
156
216
  raise FileNotFoundError("Flow file not present at '%s'" % flow_file)
157
- # Check if the module has already been loaded
158
- if flow_file in loaded_modules:
159
- module = loaded_modules[flow_file]
160
- else:
161
- # Load the module if it's not already loaded
162
- spec = importlib.util.spec_from_file_location("module", flow_file)
163
- module = importlib.util.module_from_spec(spec)
164
- spec.loader.exec_module(module)
165
- # Cache the loaded module
166
- loaded_modules[flow_file] = module
167
- classes = inspect.getmembers(module, inspect.isclass)
168
-
169
- flow_cls = None
170
- for _, kls in classes:
171
- if kls != FlowSpec and issubclass(kls, FlowSpec):
172
- if flow_cls is not None:
173
- raise MetaflowException(
174
- "Multiple FlowSpec classes found in %s" % flow_file
175
- )
176
- flow_cls = kls
177
-
178
- return flow_cls
217
+
218
+ flow_dir = os.path.dirname(os.path.abspath(flow_file))
219
+ path_was_added = False
220
+
221
+ # Only add to path if it's not already there
222
+ if flow_dir not in sys.path:
223
+ sys.path.insert(0, flow_dir)
224
+ path_was_added = True
225
+
226
+ try:
227
+ # Check if the module has already been loaded
228
+ if flow_file in loaded_modules:
229
+ module = loaded_modules[flow_file]
230
+ else:
231
+ # Load the module if it's not already loaded
232
+ spec = importlib.util.spec_from_file_location("module", flow_file)
233
+ module = importlib.util.module_from_spec(spec)
234
+ spec.loader.exec_module(module)
235
+ # Cache the loaded module
236
+ loaded_modules[flow_file] = module
237
+ classes = inspect.getmembers(module, inspect.isclass)
238
+
239
+ flow_cls = None
240
+ for _, kls in classes:
241
+ if kls != FlowSpec and issubclass(kls, FlowSpec):
242
+ if flow_cls is not None:
243
+ raise MetaflowException(
244
+ "Multiple FlowSpec classes found in %s" % flow_file
245
+ )
246
+ flow_cls = kls
247
+
248
+ if flow_cls is None:
249
+ raise MetaflowException("No FlowSpec class found in %s" % flow_file)
250
+ return flow_cls
251
+ finally:
252
+ # Only remove from path if we added it
253
+ if path_was_added:
254
+ try:
255
+ sys.path.remove(flow_dir)
256
+ except ValueError:
257
+ # User's code might have removed it already
258
+ pass
179
259
 
180
260
 
181
261
  class MetaflowAPI(object):
182
- def __init__(self, parent=None, **kwargs):
262
+ def __init__(self, parent=None, flow_cls=None, config_input=None, **kwargs):
183
263
  self._parent = parent
184
264
  self._chain = [{self._API_NAME: kwargs}]
265
+ self._flow_cls = flow_cls
266
+ self._config_input = config_input
267
+ self._cached_computed_parameters = None
185
268
 
186
269
  @property
187
270
  def parent(self):
@@ -200,23 +283,30 @@ class MetaflowAPI(object):
200
283
  @classmethod
201
284
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
202
285
  flow_cls = extract_flow_class_from_file(flow_file)
203
- flow_parameters = [p for _, p in flow_cls._get_parameters()]
286
+
204
287
  with flow_context(flow_cls) as _:
205
- add_decorator_options(cli_collection)
206
-
207
- class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
- command_groups = cli_collection.sources
209
- for each_group in command_groups:
210
- for _, cmd_obj in each_group.commands.items():
211
- if isinstance(cmd_obj, click.Group):
212
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
- class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
- elif isinstance(cmd_obj, click.Command):
215
- class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
- else:
217
- raise RuntimeError(
218
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
- )
288
+ cli_collection, config_input = config_options_with_config_input(
289
+ cli_collection
290
+ )
291
+ cli_collection = add_decorator_options(cli_collection)
292
+
293
+ def getattr_wrapper(_self, name):
294
+ # Functools.partial do not automatically bind self (no __get__)
295
+ with flow_context(flow_cls) as _:
296
+ # We also wrap this in the proper flow context because since commands
297
+ # are loaded lazily, we need the proper flow context to compute things
298
+ # like parameters. If we do not do this, the outer flow's context will
299
+ # be used.
300
+ return _self._internal_getattr(_self, name)
301
+
302
+ class_dict = {
303
+ "__module__": "metaflow",
304
+ "_API_NAME": flow_file,
305
+ "_internal_getattr": functools.partial(
306
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
307
+ ),
308
+ "__getattr__": getattr_wrapper,
309
+ }
220
310
 
221
311
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
222
312
  to_return.__name__ = flow_file
@@ -237,11 +327,16 @@ class MetaflowAPI(object):
237
327
  defaults,
238
328
  **kwargs,
239
329
  )
240
- return to_return(parent=None, **method_params)
330
+ return to_return(
331
+ parent=None,
332
+ flow_cls=flow_cls,
333
+ config_input=config_input,
334
+ **method_params,
335
+ )
241
336
 
242
337
  m = _method
243
- m.__name__ = cmd_obj.name
244
- m.__doc__ = getattr(cmd_obj, "help", None)
338
+ m.__name__ = cli_collection.name
339
+ m.__doc__ = getattr(cli_collection, "help", None)
245
340
  m.__signature__ = inspect.signature(_method).replace(
246
341
  parameters=params_sigs.values()
247
342
  )
@@ -278,8 +373,12 @@ class MetaflowAPI(object):
278
373
  for k, v in options.items():
279
374
  if isinstance(v, list):
280
375
  for i in v:
281
- components.append("--%s" % k)
282
- components.append(str(i))
376
+ if isinstance(i, tuple):
377
+ components.append("--%s" % k)
378
+ components.extend(map(str, i))
379
+ else:
380
+ components.append("--%s" % k)
381
+ components.append(str(i))
283
382
  else:
284
383
  components.append("--%s" % k)
285
384
  if v != "flag":
@@ -287,6 +386,97 @@ class MetaflowAPI(object):
287
386
 
288
387
  return components
289
388
 
389
+ def _compute_flow_parameters(self):
390
+ if (
391
+ self._flow_cls is None
392
+ or self._config_input is None
393
+ or self._parent is not None
394
+ ):
395
+ raise RuntimeError(
396
+ "Computing flow-level parameters for a non start API. "
397
+ "Please report to the Metaflow team."
398
+ )
399
+
400
+ if self._cached_computed_parameters is not None:
401
+ return self._cached_computed_parameters
402
+ self._cached_computed_parameters = []
403
+
404
+ config_options = None
405
+ if CLICK_API_PROCESS_CONFIG:
406
+ with flow_context(self._flow_cls) as _:
407
+ # We are going to resolve the configs first and then get the parameters.
408
+ # Note that configs may update/add parameters so the order is important
409
+ # Since part of the processing of configs happens by click, we need to
410
+ # "fake" it.
411
+
412
+ # Extract any config options as well as datastore and quiet options
413
+ method_params = self._chain[0][self._API_NAME]
414
+ opts = method_params["options"]
415
+ defaults = method_params["defaults"]
416
+
417
+ ds = opts.get("datastore", defaults["datastore"])
418
+ quiet = opts.get("quiet", defaults["quiet"])
419
+ is_default = False
420
+ config_file = opts.get("config-file")
421
+ if config_file is None:
422
+ is_default = True
423
+ config_file = defaults.get("config_file")
424
+
425
+ if config_file:
426
+ config_file = map(
427
+ lambda x: (x[0], ConvertPath.convert_value(x[1], is_default)),
428
+ config_file,
429
+ )
430
+
431
+ is_default = False
432
+ config_value = opts.get("config-value")
433
+ if config_value is None:
434
+ is_default = True
435
+ config_value = defaults.get("config_value")
436
+
437
+ if config_value:
438
+ config_value = map(
439
+ lambda x: (
440
+ x[0],
441
+ ConvertDictOrStr.convert_value(x[1], is_default),
442
+ ),
443
+ config_value,
444
+ )
445
+
446
+ if (config_file is None) ^ (config_value is None):
447
+ # If we have one, we should have the other
448
+ raise MetaflowException(
449
+ "Options were not properly set -- this is an internal error."
450
+ )
451
+
452
+ if config_file:
453
+ # Process both configurations; the second one will return all the merged
454
+ # configuration options properly processed.
455
+ self._config_input.process_configs(
456
+ self._flow_cls.__name__, "config_file", config_file, quiet, ds
457
+ )
458
+ config_options = self._config_input.process_configs(
459
+ self._flow_cls.__name__, "config_value", config_value, quiet, ds
460
+ )
461
+
462
+ # At this point, we are like in start() in cli.py -- we obtained the
463
+ # properly processed config_options which we can now use to process
464
+ # the config decorators (including CustomStep/FlowDecorators)
465
+ # Note that if CLICK_API_PROCESS_CONFIG is False, we still do this because
466
+ # it will init all parameters (config_options will be None)
467
+ # We ignore any errors if we don't check the configs in the click API.
468
+ new_cls = self._flow_cls._process_config_decorators(
469
+ config_options, ignore_errors=not CLICK_API_PROCESS_CONFIG
470
+ )
471
+ if new_cls:
472
+ self._flow_cls = new_cls
473
+
474
+ for _, param in self._flow_cls._get_parameters():
475
+ if param.IS_CONFIG_PARAMETER:
476
+ continue
477
+ self._cached_computed_parameters.append(param)
478
+ return self._cached_computed_parameters
479
+
290
480
 
291
481
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
292
482
  arg_params_sigs = OrderedDict()
@@ -351,7 +541,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
351
541
  method_params = _method_sanity_check(
352
542
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
353
543
  )
354
- return resulting_class(parent=_self, **method_params)
544
+ return resulting_class(parent=_self, flow_cls=None, **method_params)
355
545
 
356
546
  m = _method
357
547
  m.__name__ = cmd_obj.name
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file.
67
+ The timeout until which we try to read the deployer attribute file (in seconds).
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -2,12 +2,11 @@ import importlib
2
2
  import json
3
3
  import os
4
4
  import sys
5
- import tempfile
6
5
 
7
6
  from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING, Type
8
7
 
9
8
  from .subprocess_manager import SubprocessManager
10
- from .utils import get_lower_level_group, handle_timeout
9
+ from .utils import get_lower_level_group, handle_timeout, temporary_fifo
11
10
 
12
11
  if TYPE_CHECKING:
13
12
  import metaflow.runner.deployer
@@ -38,7 +37,7 @@ class DeployerImpl(object):
38
37
  The directory to run the subprocess in; if not specified, the current
39
38
  directory is used.
40
39
  file_read_timeout : int, default 3600
41
- The timeout until which we try to read the deployer attribute file.
40
+ The timeout until which we try to read the deployer attribute file (in seconds).
42
41
  **kwargs : Any
43
42
  Additional arguments that you would pass to `python myflow.py` before
44
43
  the deployment command.
@@ -62,8 +61,13 @@ class DeployerImpl(object):
62
61
  "of DeployerImpl."
63
62
  )
64
63
 
64
+ from metaflow.parameters import flow_context
65
+
65
66
  if "metaflow.cli" in sys.modules:
66
- importlib.reload(sys.modules["metaflow.cli"])
67
+ # Reload the CLI with an "empty" flow -- this will remove any configuration
68
+ # options. They are re-added in from_cli (called below).
69
+ with flow_context(None) as _:
70
+ importlib.reload(sys.modules["metaflow.cli"])
67
71
  from metaflow.cli import start
68
72
  from metaflow.runner.click_api import MetaflowAPI
69
73
 
@@ -121,14 +125,11 @@ class DeployerImpl(object):
121
125
  def _create(
122
126
  self, create_class: Type["metaflow.runner.deployer.DeployedFlow"], **kwargs
123
127
  ) -> "metaflow.runner.deployer.DeployedFlow":
124
- with tempfile.TemporaryDirectory() as temp_dir:
125
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
126
- dir=temp_dir, delete=False
127
- )
128
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
128
129
  # every subclass needs to have `self.deployer_kwargs`
129
130
  command = get_lower_level_group(
130
131
  self.api, self.top_level_kwargs, self.TYPE, self.deployer_kwargs
131
- ).create(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
132
+ ).create(deployer_attribute_file=attribute_file_path, **kwargs)
132
133
 
133
134
  pid = self.spm.run_command(
134
135
  [sys.executable, *command],
@@ -139,7 +140,7 @@ class DeployerImpl(object):
139
140
 
140
141
  command_obj = self.spm.get(pid)
141
142
  content = handle_timeout(
142
- tfp_runner_attribute, command_obj, self.file_read_timeout
143
+ attribute_file_fd, command_obj, self.file_read_timeout
143
144
  )
144
145
  content = json.loads(content)
145
146
  self.name = content.get("name")
@@ -148,7 +149,7 @@ class DeployerImpl(object):
148
149
  # Additional info is used to pass additional deployer specific information.
149
150
  # It is used in non-OSS deployers (extensions).
150
151
  self.additional_info = content.get("additional_info", {})
151
-
152
+ command_obj.sync_wait()
152
153
  if command_obj.process.returncode == 0:
153
154
  return create_class(deployer=self)
154
155
 
@@ -2,13 +2,18 @@ import importlib
2
2
  import os
3
3
  import sys
4
4
  import json
5
- import tempfile
6
5
 
7
6
  from typing import Dict, Iterator, Optional, Tuple
8
7
 
9
8
  from metaflow import Run
10
9
 
11
- from .utils import handle_timeout
10
+ from metaflow.plugins import get_runner_cli
11
+
12
+ from .utils import (
13
+ temporary_fifo,
14
+ handle_timeout,
15
+ async_handle_timeout,
16
+ )
12
17
  from .subprocess_manager import CommandManager, SubprocessManager
13
18
 
14
19
 
@@ -184,7 +189,27 @@ class ExecutingRun(object):
184
189
  yield position, line
185
190
 
186
191
 
187
- class Runner(object):
192
+ class RunnerMeta(type):
193
+ def __new__(mcs, name, bases, dct):
194
+ cls = super().__new__(mcs, name, bases, dct)
195
+
196
+ def _injected_method(subcommand_name, runner_subcommand):
197
+ def f(self, *args, **kwargs):
198
+ return runner_subcommand(self, *args, **kwargs)
199
+
200
+ f.__doc__ = runner_subcommand.__doc__ or ""
201
+ f.__name__ = subcommand_name
202
+
203
+ return f
204
+
205
+ for runner_subcommand in get_runner_cli():
206
+ method_name = runner_subcommand.name.replace("-", "_")
207
+ setattr(cls, method_name, _injected_method(method_name, runner_subcommand))
208
+
209
+ return cls
210
+
211
+
212
+ class Runner(metaclass=RunnerMeta):
188
213
  """
189
214
  Metaflow's Runner API that presents a programmatic interface
190
215
  to run flows and perform other operations either synchronously or asynchronously.
@@ -218,7 +243,7 @@ class Runner(object):
218
243
  The directory to run the subprocess in; if not specified, the current
219
244
  directory is used.
220
245
  file_read_timeout : int, default 3600
221
- The timeout until which we try to read the runner attribute file.
246
+ The timeout until which we try to read the runner attribute file (in seconds).
222
247
  **kwargs : Any
223
248
  Additional arguments that you would pass to `python myflow.py` before
224
249
  the `run` command.
@@ -242,8 +267,13 @@ class Runner(object):
242
267
  # This ability is made possible by the statement:
243
268
  # 'from .metaflow_runner import Runner' in '__init__.py'
244
269
 
270
+ from metaflow.parameters import flow_context
271
+
245
272
  if "metaflow.cli" in sys.modules:
246
- importlib.reload(sys.modules["metaflow.cli"])
273
+ # Reload the CLI with an "empty" flow -- this will remove any configuration
274
+ # options. They are re-added in from_cli (called below).
275
+ with flow_context(None) as _:
276
+ importlib.reload(sys.modules["metaflow.cli"])
247
277
  from metaflow.cli import start
248
278
  from metaflow.runner.click_api import MetaflowAPI
249
279
 
@@ -267,9 +297,25 @@ class Runner(object):
267
297
  async def __aenter__(self) -> "Runner":
268
298
  return self
269
299
 
270
- def __get_executing_run(self, tfp_runner_attribute, command_obj):
271
- content = handle_timeout(
272
- tfp_runner_attribute, command_obj, self.file_read_timeout
300
+ def __get_executing_run(self, attribute_file_fd, command_obj):
301
+ content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
302
+
303
+ command_obj.sync_wait()
304
+
305
+ content = json.loads(content)
306
+ pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
307
+
308
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
309
+ metadata_for_flow = content.get("metadata")
310
+
311
+ run_object = Run(
312
+ pathspec, _namespace_check=False, _current_metadata=metadata_for_flow
313
+ )
314
+ return ExecutingRun(self, command_obj, run_object)
315
+
316
+ async def __async_get_executing_run(self, attribute_file_fd, command_obj):
317
+ content = await async_handle_timeout(
318
+ attribute_file_fd, command_obj, self.file_read_timeout
273
319
  )
274
320
  content = json.loads(content)
275
321
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
@@ -298,12 +344,9 @@ class Runner(object):
298
344
  ExecutingRun
299
345
  ExecutingRun containing the results of the run.
300
346
  """
301
- with tempfile.TemporaryDirectory() as temp_dir:
302
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
303
- dir=temp_dir, delete=False
304
- )
347
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
305
348
  command = self.api(**self.top_level_kwargs).run(
306
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
349
+ runner_attribute_file=attribute_file_path, **kwargs
307
350
  )
308
351
 
309
352
  pid = self.spm.run_command(
@@ -314,9 +357,9 @@ class Runner(object):
314
357
  )
315
358
  command_obj = self.spm.get(pid)
316
359
 
317
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
360
+ return self.__get_executing_run(attribute_file_fd, command_obj)
318
361
 
319
- def resume(self, **kwargs):
362
+ def resume(self, **kwargs) -> ExecutingRun:
320
363
  """
321
364
  Blocking resume execution of the run.
322
365
  This method will wait until the resumed run has completed execution.
@@ -332,12 +375,9 @@ class Runner(object):
332
375
  ExecutingRun
333
376
  ExecutingRun containing the results of the resumed run.
334
377
  """
335
- with tempfile.TemporaryDirectory() as temp_dir:
336
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
337
- dir=temp_dir, delete=False
338
- )
378
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
339
379
  command = self.api(**self.top_level_kwargs).resume(
340
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
380
+ runner_attribute_file=attribute_file_path, **kwargs
341
381
  )
342
382
 
343
383
  pid = self.spm.run_command(
@@ -348,7 +388,7 @@ class Runner(object):
348
388
  )
349
389
  command_obj = self.spm.get(pid)
350
390
 
351
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
391
+ return self.__get_executing_run(attribute_file_fd, command_obj)
352
392
 
353
393
  async def async_run(self, **kwargs) -> ExecutingRun:
354
394
  """
@@ -368,12 +408,9 @@ class Runner(object):
368
408
  ExecutingRun
369
409
  ExecutingRun representing the run that was started.
370
410
  """
371
- with tempfile.TemporaryDirectory() as temp_dir:
372
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
373
- dir=temp_dir, delete=False
374
- )
411
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
375
412
  command = self.api(**self.top_level_kwargs).run(
376
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
413
+ runner_attribute_file=attribute_file_path, **kwargs
377
414
  )
378
415
 
379
416
  pid = await self.spm.async_run_command(
@@ -383,9 +420,9 @@ class Runner(object):
383
420
  )
384
421
  command_obj = self.spm.get(pid)
385
422
 
386
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
423
+ return await self.__async_get_executing_run(attribute_file_fd, command_obj)
387
424
 
388
- async def async_resume(self, **kwargs):
425
+ async def async_resume(self, **kwargs) -> ExecutingRun:
389
426
  """
390
427
  Non-blocking resume execution of the run.
391
428
  This method will return as soon as the resume has launched.
@@ -403,12 +440,9 @@ class Runner(object):
403
440
  ExecutingRun
404
441
  ExecutingRun representing the resumed run that was started.
405
442
  """
406
- with tempfile.TemporaryDirectory() as temp_dir:
407
- tfp_runner_attribute = tempfile.NamedTemporaryFile(
408
- dir=temp_dir, delete=False
409
- )
443
+ with temporary_fifo() as (attribute_file_path, attribute_file_fd):
410
444
  command = self.api(**self.top_level_kwargs).resume(
411
- runner_attribute_file=tfp_runner_attribute.name, **kwargs
445
+ runner_attribute_file=attribute_file_path, **kwargs
412
446
  )
413
447
 
414
448
  pid = await self.spm.async_run_command(
@@ -418,7 +452,7 @@ class Runner(object):
418
452
  )
419
453
  command_obj = self.spm.get(pid)
420
454
 
421
- return self.__get_executing_run(tfp_runner_attribute, command_obj)
455
+ return await self.__async_get_executing_run(attribute_file_fd, command_obj)
422
456
 
423
457
  def __exit__(self, exc_type, exc_value, traceback):
424
458
  self.spm.cleanup()
@@ -46,6 +46,8 @@ class NBDeployer(object):
46
46
  base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
+ file_read_timeout : int, default 3600
50
+ The timeout until which we try to read the deployer attribute file (in seconds).
49
51
  **kwargs : Any
50
52
  Additional arguments that you would pass to `python myflow.py` i.e. options
51
53
  listed in `python myflow.py --help`