ob-metaflow 2.12.35.1__py2.py3-none-any.whl → 2.12.36.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (57) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +63 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +44 -4
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/argo/argo_workflows_deployer_objects.py +5 -1
  23. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  24. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  25. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  26. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  27. metaflow/plugins/datatools/s3/s3op.py +3 -3
  28. metaflow/plugins/kubernetes/kubernetes.py +3 -3
  29. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  30. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  31. metaflow/plugins/kubernetes/kubernetes_job.py +3 -3
  32. metaflow/plugins/parallel_decorator.py +4 -1
  33. metaflow/plugins/pypi/conda_decorator.py +20 -10
  34. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  35. metaflow/plugins/timeout_decorator.py +2 -2
  36. metaflow/runner/click_api.py +73 -19
  37. metaflow/runner/deployer.py +1 -1
  38. metaflow/runner/deployer_impl.py +2 -2
  39. metaflow/runner/metaflow_runner.py +4 -1
  40. metaflow/runner/nbdeploy.py +2 -0
  41. metaflow/runner/nbrun.py +1 -1
  42. metaflow/runner/subprocess_manager.py +3 -1
  43. metaflow/runner/utils.py +37 -20
  44. metaflow/runtime.py +199 -105
  45. metaflow/sidecar/sidecar_worker.py +1 -1
  46. metaflow/user_configs/__init__.py +0 -0
  47. metaflow/user_configs/config_decorators.py +563 -0
  48. metaflow/user_configs/config_options.py +495 -0
  49. metaflow/user_configs/config_parameters.py +386 -0
  50. metaflow/util.py +17 -0
  51. metaflow/version.py +1 -1
  52. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/METADATA +3 -2
  53. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/RECORD +57 -47
  54. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/LICENSE +0 -0
  55. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/WHEEL +0 -0
  56. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/entry_points.txt +0 -0
  57. {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
+ import functools
12
13
  import importlib
13
14
  import inspect
14
15
  import itertools
@@ -38,6 +39,7 @@ from metaflow.decorators import add_decorator_options
38
39
  from metaflow.exception import MetaflowException
39
40
  from metaflow.includefile import FilePathClass
40
41
  from metaflow.parameters import JSONTypeClass, flow_context
42
+ from metaflow.user_configs.config_options import LocalFileInput
41
43
 
42
44
  # Define a recursive type alias for JSON
43
45
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -55,6 +57,7 @@ click_to_python_types = {
55
57
  File: str,
56
58
  JSONTypeClass: JSON,
57
59
  FilePathClass: str,
60
+ LocalFileInput: str,
58
61
  }
59
62
 
60
63
 
@@ -124,6 +127,37 @@ def _method_sanity_check(
124
127
  return method_params
125
128
 
126
129
 
130
+ def _lazy_load_command(
131
+ cli_collection: click.Group,
132
+ flow_parameters: Union[str, List[Parameter]],
133
+ _self,
134
+ name: str,
135
+ ):
136
+
137
+ # Context is not used in get_command so we can pass None. Since we pin click,
138
+ # this won't change from under us.
139
+
140
+ if isinstance(flow_parameters, str):
141
+ # Resolve flow_parameters -- for start, this is a function which we
142
+ # need to call to figure out the actual parameters (may be changed by configs)
143
+ flow_parameters = getattr(_self, flow_parameters)()
144
+ cmd_obj = cli_collection.get_command(None, name)
145
+ if cmd_obj:
146
+ if isinstance(cmd_obj, click.Group):
147
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
148
+ result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
149
+ elif isinstance(cmd_obj, click.Command):
150
+ result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
151
+ else:
152
+ raise RuntimeError(
153
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
154
+ )
155
+ setattr(_self, name, result)
156
+ return result
157
+ else:
158
+ raise AttributeError()
159
+
160
+
127
161
  def get_annotation(param: Union[click.Argument, click.Option]):
128
162
  py_type = click_to_python_types[type(param.type)]
129
163
  if not param.required:
@@ -179,9 +213,11 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
179
213
 
180
214
 
181
215
  class MetaflowAPI(object):
182
- def __init__(self, parent=None, **kwargs):
216
+ def __init__(self, parent=None, flow_cls=None, **kwargs):
183
217
  self._parent = parent
184
218
  self._chain = [{self._API_NAME: kwargs}]
219
+ self._flow_cls = flow_cls
220
+ self._cached_computed_parameters = None
185
221
 
186
222
  @property
187
223
  def parent(self):
@@ -200,23 +236,22 @@ class MetaflowAPI(object):
200
236
  @classmethod
201
237
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
202
238
  flow_cls = extract_flow_class_from_file(flow_file)
203
- flow_parameters = [p for _, p in flow_cls._get_parameters()]
239
+
204
240
  with flow_context(flow_cls) as _:
205
241
  add_decorator_options(cli_collection)
206
242
 
207
- class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
- command_groups = cli_collection.sources
209
- for each_group in command_groups:
210
- for _, cmd_obj in each_group.commands.items():
211
- if isinstance(cmd_obj, click.Group):
212
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
- class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
- elif isinstance(cmd_obj, click.Command):
215
- class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
- else:
217
- raise RuntimeError(
218
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
- )
243
+ def getattr_wrapper(_self, name):
244
+ # Functools.partial do not automatically bind self (no __get__)
245
+ return _self._internal_getattr(_self, name)
246
+
247
+ class_dict = {
248
+ "__module__": "metaflow",
249
+ "_API_NAME": flow_file,
250
+ "_internal_getattr": functools.partial(
251
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
252
+ ),
253
+ "__getattr__": getattr_wrapper,
254
+ }
220
255
 
221
256
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
222
257
  to_return.__name__ = flow_file
@@ -237,11 +272,11 @@ class MetaflowAPI(object):
237
272
  defaults,
238
273
  **kwargs,
239
274
  )
240
- return to_return(parent=None, **method_params)
275
+ return to_return(parent=None, flow_cls=flow_cls, **method_params)
241
276
 
242
277
  m = _method
243
- m.__name__ = cmd_obj.name
244
- m.__doc__ = getattr(cmd_obj, "help", None)
278
+ m.__name__ = cli_collection.name
279
+ m.__doc__ = getattr(cli_collection, "help", None)
245
280
  m.__signature__ = inspect.signature(_method).replace(
246
281
  parameters=params_sigs.values()
247
282
  )
@@ -287,6 +322,25 @@ class MetaflowAPI(object):
287
322
 
288
323
  return components
289
324
 
325
+ def _compute_flow_parameters(self):
326
+ if self._flow_cls is None or self._parent is not None:
327
+ raise RuntimeError(
328
+ "Computing flow-level parameters for a non start API. "
329
+ "Please report to the Metaflow team."
330
+ )
331
+ # TODO: We need to actually compute the new parameters (based on configs) which
332
+ # would involve processing the options at least partially. We will do this
333
+ # before GA but for now making it work for regular parameters
334
+ if self._cached_computed_parameters is not None:
335
+ return self._cached_computed_parameters
336
+ self._cached_computed_parameters = []
337
+ for _, param in self._flow_cls._get_parameters():
338
+ if param.IS_CONFIG_PARAMETER:
339
+ continue
340
+ param.init()
341
+ self._cached_computed_parameters.append(param)
342
+ return self._cached_computed_parameters
343
+
290
344
 
291
345
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
292
346
  arg_params_sigs = OrderedDict()
@@ -351,7 +405,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
351
405
  method_params = _method_sanity_check(
352
406
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
353
407
  )
354
- return resulting_class(parent=_self, **method_params)
408
+ return resulting_class(parent=_self, flow_cls=None, **method_params)
355
409
 
356
410
  m = _method
357
411
  m.__name__ = cmd_obj.name
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file.
67
+ The timeout until which we try to read the deployer attribute file (in seconds).
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -37,7 +37,7 @@ class DeployerImpl(object):
37
37
  The directory to run the subprocess in; if not specified, the current
38
38
  directory is used.
39
39
  file_read_timeout : int, default 3600
40
- The timeout until which we try to read the deployer attribute file.
40
+ The timeout until which we try to read the deployer attribute file (in seconds).
41
41
  **kwargs : Any
42
42
  Additional arguments that you would pass to `python myflow.py` before
43
43
  the deployment command.
@@ -144,7 +144,7 @@ class DeployerImpl(object):
144
144
  # Additional info is used to pass additional deployer specific information.
145
145
  # It is used in non-OSS deployers (extensions).
146
146
  self.additional_info = content.get("additional_info", {})
147
-
147
+ command_obj.sync_wait()
148
148
  if command_obj.process.returncode == 0:
149
149
  return create_class(deployer=self)
150
150
 
@@ -221,7 +221,7 @@ class Runner(object):
221
221
  The directory to run the subprocess in; if not specified, the current
222
222
  directory is used.
223
223
  file_read_timeout : int, default 3600
224
- The timeout until which we try to read the runner attribute file.
224
+ The timeout until which we try to read the runner attribute file (in seconds).
225
225
  **kwargs : Any
226
226
  Additional arguments that you would pass to `python myflow.py` before
227
227
  the `run` command.
@@ -272,6 +272,9 @@ class Runner(object):
272
272
 
273
273
  def __get_executing_run(self, attribute_file_fd, command_obj):
274
274
  content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
275
+
276
+ command_obj.sync_wait()
277
+
275
278
  content = json.loads(content)
276
279
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
277
280
 
@@ -46,6 +46,8 @@ class NBDeployer(object):
46
46
  base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
+ file_read_timeout : int, default 3600
50
+ The timeout until which we try to read the deployer attribute file (in seconds).
49
51
  **kwargs : Any
50
52
  Additional arguments that you would pass to `python myflow.py` i.e. options
51
53
  listed in `python myflow.py --help`
metaflow/runner/nbrun.py CHANGED
@@ -44,7 +44,7 @@ class NBRunner(object):
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
47
- The timeout until which we try to read the runner attribute file.
47
+ The timeout until which we try to read the runner attribute file (in seconds).
48
48
  **kwargs : Any
49
49
  Additional arguments that you would pass to `python myflow.py` before
50
50
  the `run` command.
@@ -120,6 +120,9 @@ class SubprocessManager(object):
120
120
  """
121
121
  Run a command synchronously and return its process ID.
122
122
 
123
+ Note: in no case does this wait for the process to *finish*. Use sync_wait()
124
+ to wait for the command to finish.
125
+
123
126
  Parameters
124
127
  ----------
125
128
  command : List[str]
@@ -145,7 +148,6 @@ class SubprocessManager(object):
145
148
  command_obj = CommandManager(command, env, cwd)
146
149
  pid = command_obj.run(show_output=show_output)
147
150
  self.commands[pid] = command_obj
148
- command_obj.sync_wait()
149
151
  return pid
150
152
 
151
153
  async def async_run_command(
metaflow/runner/utils.py CHANGED
@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
91
91
  encoding : str, optional
92
92
  Encoding to use while reading the file, by default "utf-8".
93
93
  timeout : int, optional
94
- Timeout for reading the file in milliseconds, by default 3600.
94
+ Timeout for reading the file in seconds, by default 3600.
95
95
 
96
96
  Returns
97
97
  -------
@@ -107,30 +107,47 @@ def read_from_fifo_when_ready(
107
107
  content to the FIFO.
108
108
  """
109
109
  content = bytearray()
110
-
111
110
  poll = select.poll()
112
111
  poll.register(fifo_fd, select.POLLIN)
113
-
112
+ max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
114
113
  while True:
115
- poll_begin = time.time()
116
- poll.poll(timeout)
117
- timeout -= 1000 * (time.time() - poll_begin)
118
-
119
- if timeout <= 0:
114
+ if timeout < 0:
120
115
  raise TimeoutError("Timeout while waiting for the file content")
121
116
 
117
+ poll_begin = time.time()
118
+ # We poll for a very short time to be also able to check if the file was closed
119
+ # If the file is closed, we assume that we only have one writer so if we have
120
+ # data, we break out. This is to work around issues in macos
121
+ events = poll.poll(min(10, timeout * 1000))
122
+ timeout -= time.time() - poll_begin
123
+
122
124
  try:
123
- data = os.read(fifo_fd, 128)
124
- while data:
125
+ data = os.read(fifo_fd, 8192)
126
+ if data:
125
127
  content += data
126
- data = os.read(fifo_fd, 128)
127
-
128
- # Read from a non-blocking closed FIFO returns an empty byte array
129
- break
130
-
128
+ else:
129
+ if len(events):
130
+ # We read an EOF -- consider the file done
131
+ break
132
+ else:
133
+ # We had no events (just a timeout) and the read didn't return
134
+ # an exception so the file is still open; we continue waiting for data
135
+ # Unfortunately, on MacOS, it seems that even *after* the file is
136
+ # closed on the other end, we still don't get a BlockingIOError so
137
+ # we hack our way and timeout if there is no write in 30ms which is
138
+ # a relative eternity for file writes.
139
+ if content:
140
+ if max_timeout <= 0:
141
+ break
142
+ max_timeout -= 1
143
+ continue
131
144
  except BlockingIOError:
132
- # FIFO is open but no data is available yet
133
- continue
145
+ has_blocking_error = True
146
+ if content:
147
+ # The file was closed
148
+ break
149
+ # else, if we have no content, we continue waiting for the file to be open
150
+ # and written to.
134
151
 
135
152
  if not content and check_process_exited(command_obj):
136
153
  raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -156,7 +173,7 @@ async def async_read_from_fifo_when_ready(
156
173
  encoding : str, optional
157
174
  Encoding to use while reading the file, by default "utf-8".
158
175
  timeout : int, optional
159
- Timeout for reading the file in milliseconds, by default 3600.
176
+ Timeout for reading the file in seconds, by default 3600.
160
177
 
161
178
  Returns
162
179
  -------
@@ -206,7 +223,7 @@ def handle_timeout(
206
223
  command_obj : CommandManager
207
224
  Command manager object that encapsulates the running command details.
208
225
  file_read_timeout : int
209
- Timeout for reading the file.
226
+ Timeout for reading the file, in seconds
210
227
 
211
228
  Returns
212
229
  -------
@@ -243,7 +260,7 @@ async def async_handle_timeout(
243
260
  command_obj : CommandManager
244
261
  Command manager object that encapsulates the running command details.
245
262
  file_read_timeout : int
246
- Timeout for reading the file.
263
+ Timeout for reading the file, in seconds
247
264
 
248
265
  Returns
249
266
  -------