ob-metaflow 2.12.36.2__py2.py3-none-any.whl → 2.12.36.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (54) hide show
  1. metaflow/__init__.py +0 -3
  2. metaflow/cli.py +697 -84
  3. metaflow/cli_args.py +0 -17
  4. metaflow/cmd/develop/stub_generator.py +2 -9
  5. metaflow/decorators.py +2 -63
  6. metaflow/extension_support/plugins.py +27 -41
  7. metaflow/flowspec.py +16 -156
  8. metaflow/includefile.py +22 -50
  9. metaflow/metaflow_config.py +1 -1
  10. metaflow/package.py +3 -17
  11. metaflow/parameters.py +23 -80
  12. metaflow/plugins/__init__.py +0 -4
  13. metaflow/plugins/airflow/airflow_cli.py +0 -1
  14. metaflow/plugins/argo/argo_workflows.py +1 -41
  15. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  16. metaflow/plugins/argo/argo_workflows_deployer_objects.py +1 -5
  17. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  18. metaflow/plugins/aws/step_functions/step_functions.py +0 -32
  19. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  20. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -3
  21. metaflow/plugins/datatools/s3/s3op.py +3 -3
  22. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  23. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  24. metaflow/plugins/pypi/conda_decorator.py +10 -20
  25. metaflow/plugins/pypi/pypi_decorator.py +9 -11
  26. metaflow/plugins/timeout_decorator.py +2 -2
  27. metaflow/runner/click_api.py +19 -73
  28. metaflow/runner/deployer.py +1 -1
  29. metaflow/runner/deployer_impl.py +2 -2
  30. metaflow/runner/metaflow_runner.py +1 -4
  31. metaflow/runner/nbdeploy.py +0 -2
  32. metaflow/runner/nbrun.py +1 -1
  33. metaflow/runner/subprocess_manager.py +1 -3
  34. metaflow/runner/utils.py +20 -37
  35. metaflow/runtime.py +73 -111
  36. metaflow/sidecar/sidecar_worker.py +1 -1
  37. metaflow/util.py +0 -17
  38. metaflow/version.py +1 -1
  39. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/METADATA +2 -3
  40. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/RECORD +44 -54
  41. metaflow/cli_components/__init__.py +0 -0
  42. metaflow/cli_components/dump_cmd.py +0 -96
  43. metaflow/cli_components/init_cmd.py +0 -51
  44. metaflow/cli_components/run_cmds.py +0 -358
  45. metaflow/cli_components/step_cmd.py +0 -189
  46. metaflow/cli_components/utils.py +0 -140
  47. metaflow/user_configs/__init__.py +0 -0
  48. metaflow/user_configs/config_decorators.py +0 -563
  49. metaflow/user_configs/config_options.py +0 -495
  50. metaflow/user_configs/config_parameters.py +0 -386
  51. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/LICENSE +0 -0
  52. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/WHEEL +0 -0
  53. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/entry_points.txt +0 -0
  54. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/top_level.txt +0 -0
@@ -25,10 +25,9 @@ class PyPIStepDecorator(StepDecorator):
25
25
  defaults = {"packages": {}, "python": None, "disabled": None} # wheels
26
26
 
27
27
  def __init__(self, attributes=None, statically_defined=False):
28
- self._attributes_with_user_values = (
29
- set(attributes.keys()) if attributes is not None else set()
28
+ self._user_defined_attributes = (
29
+ attributes.copy() if attributes is not None else {}
30
30
  )
31
-
32
31
  super().__init__(attributes, statically_defined)
33
32
 
34
33
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
@@ -43,9 +42,10 @@ class PyPIStepDecorator(StepDecorator):
43
42
  if "pypi_base" in self.flow._flow_decorators:
44
43
  pypi_base = self.flow._flow_decorators["pypi_base"][0]
45
44
  super_attributes = pypi_base.attributes
46
- self._attributes_with_user_values.update(
47
- pypi_base._attributes_with_user_values
48
- )
45
+ self._user_defined_attributes = {
46
+ **self._user_defined_attributes,
47
+ **pypi_base._user_defined_attributes,
48
+ }
49
49
  self.attributes["packages"] = {
50
50
  **super_attributes["packages"],
51
51
  **self.attributes["packages"],
@@ -106,7 +106,7 @@ class PyPIStepDecorator(StepDecorator):
106
106
  environment.set_local_root(LocalStorage.get_datastore_root_from_config(logger))
107
107
 
108
108
  def is_attribute_user_defined(self, name):
109
- return name in self._attributes_with_user_values
109
+ return name in self._user_defined_attributes
110
110
 
111
111
 
112
112
  class PyPIFlowDecorator(FlowDecorator):
@@ -129,10 +129,9 @@ class PyPIFlowDecorator(FlowDecorator):
129
129
  defaults = {"packages": {}, "python": None, "disabled": None}
130
130
 
131
131
  def __init__(self, attributes=None, statically_defined=False):
132
- self._attributes_with_user_values = (
133
- set(attributes.keys()) if attributes is not None else set()
132
+ self._user_defined_attributes = (
133
+ attributes.copy() if attributes is not None else {}
134
134
  )
135
-
136
135
  super().__init__(attributes, statically_defined)
137
136
 
138
137
  def flow_init(
@@ -141,7 +140,6 @@ class PyPIFlowDecorator(FlowDecorator):
141
140
  from metaflow import decorators
142
141
 
143
142
  decorators._attach_decorators(flow, ["pypi"])
144
- decorators._init(flow)
145
143
 
146
144
  # @pypi uses a conda environment to create a virtual environment.
147
145
  # The conda environment can be created through micromamba.
@@ -37,8 +37,8 @@ class TimeoutDecorator(StepDecorator):
37
37
  name = "timeout"
38
38
  defaults = {"seconds": 0, "minutes": 0, "hours": 0}
39
39
 
40
- def init(self):
41
- super().init()
40
+ def __init__(self, *args, **kwargs):
41
+ super(TimeoutDecorator, self).__init__(*args, **kwargs)
42
42
  # Initialize secs in __init__ so other decorators could safely use this
43
43
  # value without worrying about decorator order.
44
44
  # Convert values in attributes to type:int since they can be type:str
@@ -9,7 +9,6 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
- import functools
13
12
  import importlib
14
13
  import inspect
15
14
  import itertools
@@ -39,7 +38,6 @@ from metaflow.decorators import add_decorator_options
39
38
  from metaflow.exception import MetaflowException
40
39
  from metaflow.includefile import FilePathClass
41
40
  from metaflow.parameters import JSONTypeClass, flow_context
42
- from metaflow.user_configs.config_options import LocalFileInput
43
41
 
44
42
  # Define a recursive type alias for JSON
45
43
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -57,7 +55,6 @@ click_to_python_types = {
57
55
  File: str,
58
56
  JSONTypeClass: JSON,
59
57
  FilePathClass: str,
60
- LocalFileInput: str,
61
58
  }
62
59
 
63
60
 
@@ -127,37 +124,6 @@ def _method_sanity_check(
127
124
  return method_params
128
125
 
129
126
 
130
- def _lazy_load_command(
131
- cli_collection: click.Group,
132
- flow_parameters: Union[str, List[Parameter]],
133
- _self,
134
- name: str,
135
- ):
136
-
137
- # Context is not used in get_command so we can pass None. Since we pin click,
138
- # this won't change from under us.
139
-
140
- if isinstance(flow_parameters, str):
141
- # Resolve flow_parameters -- for start, this is a function which we
142
- # need to call to figure out the actual parameters (may be changed by configs)
143
- flow_parameters = getattr(_self, flow_parameters)()
144
- cmd_obj = cli_collection.get_command(None, name)
145
- if cmd_obj:
146
- if isinstance(cmd_obj, click.Group):
147
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
148
- result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
149
- elif isinstance(cmd_obj, click.Command):
150
- result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
151
- else:
152
- raise RuntimeError(
153
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
154
- )
155
- setattr(_self, name, result)
156
- return result
157
- else:
158
- raise AttributeError()
159
-
160
-
161
127
  def get_annotation(param: Union[click.Argument, click.Option]):
162
128
  py_type = click_to_python_types[type(param.type)]
163
129
  if not param.required:
@@ -213,11 +179,9 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
213
179
 
214
180
 
215
181
  class MetaflowAPI(object):
216
- def __init__(self, parent=None, flow_cls=None, **kwargs):
182
+ def __init__(self, parent=None, **kwargs):
217
183
  self._parent = parent
218
184
  self._chain = [{self._API_NAME: kwargs}]
219
- self._flow_cls = flow_cls
220
- self._cached_computed_parameters = None
221
185
 
222
186
  @property
223
187
  def parent(self):
@@ -236,22 +200,23 @@ class MetaflowAPI(object):
236
200
  @classmethod
237
201
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
238
202
  flow_cls = extract_flow_class_from_file(flow_file)
239
-
203
+ flow_parameters = [p for _, p in flow_cls._get_parameters()]
240
204
  with flow_context(flow_cls) as _:
241
205
  add_decorator_options(cli_collection)
242
206
 
243
- def getattr_wrapper(_self, name):
244
- # Functools.partial do not automatically bind self (no __get__)
245
- return _self._internal_getattr(_self, name)
246
-
247
- class_dict = {
248
- "__module__": "metaflow",
249
- "_API_NAME": flow_file,
250
- "_internal_getattr": functools.partial(
251
- _lazy_load_command, cli_collection, "_compute_flow_parameters"
252
- ),
253
- "__getattr__": getattr_wrapper,
254
- }
207
+ class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
+ command_groups = cli_collection.sources
209
+ for each_group in command_groups:
210
+ for _, cmd_obj in each_group.commands.items():
211
+ if isinstance(cmd_obj, click.Group):
212
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
+ class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
+ elif isinstance(cmd_obj, click.Command):
215
+ class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
+ else:
217
+ raise RuntimeError(
218
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
+ )
255
220
 
256
221
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
257
222
  to_return.__name__ = flow_file
@@ -272,11 +237,11 @@ class MetaflowAPI(object):
272
237
  defaults,
273
238
  **kwargs,
274
239
  )
275
- return to_return(parent=None, flow_cls=flow_cls, **method_params)
240
+ return to_return(parent=None, **method_params)
276
241
 
277
242
  m = _method
278
- m.__name__ = cli_collection.name
279
- m.__doc__ = getattr(cli_collection, "help", None)
243
+ m.__name__ = cmd_obj.name
244
+ m.__doc__ = getattr(cmd_obj, "help", None)
280
245
  m.__signature__ = inspect.signature(_method).replace(
281
246
  parameters=params_sigs.values()
282
247
  )
@@ -322,25 +287,6 @@ class MetaflowAPI(object):
322
287
 
323
288
  return components
324
289
 
325
- def _compute_flow_parameters(self):
326
- if self._flow_cls is None or self._parent is not None:
327
- raise RuntimeError(
328
- "Computing flow-level parameters for a non start API. "
329
- "Please report to the Metaflow team."
330
- )
331
- # TODO: We need to actually compute the new parameters (based on configs) which
332
- # would involve processing the options at least partially. We will do this
333
- # before GA but for now making it work for regular parameters
334
- if self._cached_computed_parameters is not None:
335
- return self._cached_computed_parameters
336
- self._cached_computed_parameters = []
337
- for _, param in self._flow_cls._get_parameters():
338
- if param.IS_CONFIG_PARAMETER:
339
- continue
340
- param.init()
341
- self._cached_computed_parameters.append(param)
342
- return self._cached_computed_parameters
343
-
344
290
 
345
291
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
346
292
  arg_params_sigs = OrderedDict()
@@ -405,7 +351,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
405
351
  method_params = _method_sanity_check(
406
352
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
407
353
  )
408
- return resulting_class(parent=_self, flow_cls=None, **method_params)
354
+ return resulting_class(parent=_self, **method_params)
409
355
 
410
356
  m = _method
411
357
  m.__name__ = cmd_obj.name
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file (in seconds).
67
+ The timeout until which we try to read the deployer attribute file.
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -37,7 +37,7 @@ class DeployerImpl(object):
37
37
  The directory to run the subprocess in; if not specified, the current
38
38
  directory is used.
39
39
  file_read_timeout : int, default 3600
40
- The timeout until which we try to read the deployer attribute file (in seconds).
40
+ The timeout until which we try to read the deployer attribute file.
41
41
  **kwargs : Any
42
42
  Additional arguments that you would pass to `python myflow.py` before
43
43
  the deployment command.
@@ -144,7 +144,7 @@ class DeployerImpl(object):
144
144
  # Additional info is used to pass additional deployer specific information.
145
145
  # It is used in non-OSS deployers (extensions).
146
146
  self.additional_info = content.get("additional_info", {})
147
- command_obj.sync_wait()
147
+
148
148
  if command_obj.process.returncode == 0:
149
149
  return create_class(deployer=self)
150
150
 
@@ -221,7 +221,7 @@ class Runner(object):
221
221
  The directory to run the subprocess in; if not specified, the current
222
222
  directory is used.
223
223
  file_read_timeout : int, default 3600
224
- The timeout until which we try to read the runner attribute file (in seconds).
224
+ The timeout until which we try to read the runner attribute file.
225
225
  **kwargs : Any
226
226
  Additional arguments that you would pass to `python myflow.py` before
227
227
  the `run` command.
@@ -272,9 +272,6 @@ class Runner(object):
272
272
 
273
273
  def __get_executing_run(self, attribute_file_fd, command_obj):
274
274
  content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
275
-
276
- command_obj.sync_wait()
277
-
278
275
  content = json.loads(content)
279
276
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
280
277
 
@@ -46,8 +46,6 @@ class NBDeployer(object):
46
46
  base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
- file_read_timeout : int, default 3600
50
- The timeout until which we try to read the deployer attribute file (in seconds).
51
49
  **kwargs : Any
52
50
  Additional arguments that you would pass to `python myflow.py` i.e. options
53
51
  listed in `python myflow.py --help`
metaflow/runner/nbrun.py CHANGED
@@ -44,7 +44,7 @@ class NBRunner(object):
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
47
- The timeout until which we try to read the runner attribute file (in seconds).
47
+ The timeout until which we try to read the runner attribute file.
48
48
  **kwargs : Any
49
49
  Additional arguments that you would pass to `python myflow.py` before
50
50
  the `run` command.
@@ -120,9 +120,6 @@ class SubprocessManager(object):
120
120
  """
121
121
  Run a command synchronously and return its process ID.
122
122
 
123
- Note: in no case does this wait for the process to *finish*. Use sync_wait()
124
- to wait for the command to finish.
125
-
126
123
  Parameters
127
124
  ----------
128
125
  command : List[str]
@@ -148,6 +145,7 @@ class SubprocessManager(object):
148
145
  command_obj = CommandManager(command, env, cwd)
149
146
  pid = command_obj.run(show_output=show_output)
150
147
  self.commands[pid] = command_obj
148
+ command_obj.sync_wait()
151
149
  return pid
152
150
 
153
151
  async def async_run_command(
metaflow/runner/utils.py CHANGED
@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
91
91
  encoding : str, optional
92
92
  Encoding to use while reading the file, by default "utf-8".
93
93
  timeout : int, optional
94
- Timeout for reading the file in seconds, by default 3600.
94
+ Timeout for reading the file in milliseconds, by default 3600.
95
95
 
96
96
  Returns
97
97
  -------
@@ -107,47 +107,30 @@ def read_from_fifo_when_ready(
107
107
  content to the FIFO.
108
108
  """
109
109
  content = bytearray()
110
+
110
111
  poll = select.poll()
111
112
  poll.register(fifo_fd, select.POLLIN)
112
- max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
113
- while True:
114
- if timeout < 0:
115
- raise TimeoutError("Timeout while waiting for the file content")
116
113
 
114
+ while True:
117
115
  poll_begin = time.time()
118
- # We poll for a very short time to be also able to check if the file was closed
119
- # If the file is closed, we assume that we only have one writer so if we have
120
- # data, we break out. This is to work around issues in macos
121
- events = poll.poll(min(10, timeout * 1000))
122
- timeout -= time.time() - poll_begin
116
+ poll.poll(timeout)
117
+ timeout -= 1000 * (time.time() - poll_begin)
118
+
119
+ if timeout <= 0:
120
+ raise TimeoutError("Timeout while waiting for the file content")
123
121
 
124
122
  try:
125
- data = os.read(fifo_fd, 8192)
126
- if data:
123
+ data = os.read(fifo_fd, 128)
124
+ while data:
127
125
  content += data
128
- else:
129
- if len(events):
130
- # We read an EOF -- consider the file done
131
- break
132
- else:
133
- # We had no events (just a timeout) and the read didn't return
134
- # an exception so the file is still open; we continue waiting for data
135
- # Unfortunately, on MacOS, it seems that even *after* the file is
136
- # closed on the other end, we still don't get a BlockingIOError so
137
- # we hack our way and timeout if there is no write in 30ms which is
138
- # a relative eternity for file writes.
139
- if content:
140
- if max_timeout <= 0:
141
- break
142
- max_timeout -= 1
143
- continue
126
+ data = os.read(fifo_fd, 128)
127
+
128
+ # Read from a non-blocking closed FIFO returns an empty byte array
129
+ break
130
+
144
131
  except BlockingIOError:
145
- has_blocking_error = True
146
- if content:
147
- # The file was closed
148
- break
149
- # else, if we have no content, we continue waiting for the file to be open
150
- # and written to.
132
+ # FIFO is open but no data is available yet
133
+ continue
151
134
 
152
135
  if not content and check_process_exited(command_obj):
153
136
  raise CalledProcessError(command_obj.process.returncode, command_obj.command)
@@ -173,7 +156,7 @@ async def async_read_from_fifo_when_ready(
173
156
  encoding : str, optional
174
157
  Encoding to use while reading the file, by default "utf-8".
175
158
  timeout : int, optional
176
- Timeout for reading the file in seconds, by default 3600.
159
+ Timeout for reading the file in milliseconds, by default 3600.
177
160
 
178
161
  Returns
179
162
  -------
@@ -223,7 +206,7 @@ def handle_timeout(
223
206
  command_obj : CommandManager
224
207
  Command manager object that encapsulates the running command details.
225
208
  file_read_timeout : int
226
- Timeout for reading the file, in seconds
209
+ Timeout for reading the file.
227
210
 
228
211
  Returns
229
212
  -------
@@ -260,7 +243,7 @@ async def async_handle_timeout(
260
243
  command_obj : CommandManager
261
244
  Command manager object that encapsulates the running command details.
262
245
  file_read_timeout : int
263
- Timeout for reading the file, in seconds
246
+ Timeout for reading the file.
264
247
 
265
248
  Returns
266
249
  -------