ob-metaflow 2.12.35.1__py2.py3-none-any.whl → 2.12.36.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +84 -697
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +358 -0
- metaflow/cli_components/step_cmd.py +189 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/decorators.py +63 -2
- metaflow/extension_support/plugins.py +41 -27
- metaflow/flowspec.py +156 -16
- metaflow/includefile.py +50 -22
- metaflow/metaflow_config.py +1 -1
- metaflow/package.py +17 -3
- metaflow/parameters.py +80 -23
- metaflow/plugins/__init__.py +4 -0
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +44 -4
- metaflow/plugins/argo/argo_workflows_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +5 -1
- metaflow/plugins/aws/batch/batch_decorator.py +2 -2
- metaflow/plugins/aws/step_functions/step_functions.py +32 -0
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/kubernetes/kubernetes.py +3 -3
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -3
- metaflow/plugins/parallel_decorator.py +4 -1
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +73 -19
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +2 -2
- metaflow/runner/metaflow_runner.py +4 -1
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +3 -1
- metaflow/runner/utils.py +37 -20
- metaflow/runtime.py +199 -105
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +495 -0
- metaflow/user_configs/config_parameters.py +386 -0
- metaflow/util.py +17 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/METADATA +3 -2
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/RECORD +57 -47
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.35.1.dist-info → ob_metaflow-2.12.36.2.dist-info}/top_level.txt +0 -0
metaflow/runner/click_api.py
CHANGED
|
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
|
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
|
+
import functools
|
|
12
13
|
import importlib
|
|
13
14
|
import inspect
|
|
14
15
|
import itertools
|
|
@@ -38,6 +39,7 @@ from metaflow.decorators import add_decorator_options
|
|
|
38
39
|
from metaflow.exception import MetaflowException
|
|
39
40
|
from metaflow.includefile import FilePathClass
|
|
40
41
|
from metaflow.parameters import JSONTypeClass, flow_context
|
|
42
|
+
from metaflow.user_configs.config_options import LocalFileInput
|
|
41
43
|
|
|
42
44
|
# Define a recursive type alias for JSON
|
|
43
45
|
JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
|
|
@@ -55,6 +57,7 @@ click_to_python_types = {
|
|
|
55
57
|
File: str,
|
|
56
58
|
JSONTypeClass: JSON,
|
|
57
59
|
FilePathClass: str,
|
|
60
|
+
LocalFileInput: str,
|
|
58
61
|
}
|
|
59
62
|
|
|
60
63
|
|
|
@@ -124,6 +127,37 @@ def _method_sanity_check(
|
|
|
124
127
|
return method_params
|
|
125
128
|
|
|
126
129
|
|
|
130
|
+
def _lazy_load_command(
|
|
131
|
+
cli_collection: click.Group,
|
|
132
|
+
flow_parameters: Union[str, List[Parameter]],
|
|
133
|
+
_self,
|
|
134
|
+
name: str,
|
|
135
|
+
):
|
|
136
|
+
|
|
137
|
+
# Context is not used in get_command so we can pass None. Since we pin click,
|
|
138
|
+
# this won't change from under us.
|
|
139
|
+
|
|
140
|
+
if isinstance(flow_parameters, str):
|
|
141
|
+
# Resolve flow_parameters -- for start, this is a function which we
|
|
142
|
+
# need to call to figure out the actual parameters (may be changed by configs)
|
|
143
|
+
flow_parameters = getattr(_self, flow_parameters)()
|
|
144
|
+
cmd_obj = cli_collection.get_command(None, name)
|
|
145
|
+
if cmd_obj:
|
|
146
|
+
if isinstance(cmd_obj, click.Group):
|
|
147
|
+
# TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
|
|
148
|
+
result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
|
|
149
|
+
elif isinstance(cmd_obj, click.Command):
|
|
150
|
+
result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
|
|
151
|
+
else:
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
"Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
|
|
154
|
+
)
|
|
155
|
+
setattr(_self, name, result)
|
|
156
|
+
return result
|
|
157
|
+
else:
|
|
158
|
+
raise AttributeError()
|
|
159
|
+
|
|
160
|
+
|
|
127
161
|
def get_annotation(param: Union[click.Argument, click.Option]):
|
|
128
162
|
py_type = click_to_python_types[type(param.type)]
|
|
129
163
|
if not param.required:
|
|
@@ -179,9 +213,11 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
|
|
|
179
213
|
|
|
180
214
|
|
|
181
215
|
class MetaflowAPI(object):
|
|
182
|
-
def __init__(self, parent=None, **kwargs):
|
|
216
|
+
def __init__(self, parent=None, flow_cls=None, **kwargs):
|
|
183
217
|
self._parent = parent
|
|
184
218
|
self._chain = [{self._API_NAME: kwargs}]
|
|
219
|
+
self._flow_cls = flow_cls
|
|
220
|
+
self._cached_computed_parameters = None
|
|
185
221
|
|
|
186
222
|
@property
|
|
187
223
|
def parent(self):
|
|
@@ -200,23 +236,22 @@ class MetaflowAPI(object):
|
|
|
200
236
|
@classmethod
|
|
201
237
|
def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
|
|
202
238
|
flow_cls = extract_flow_class_from_file(flow_file)
|
|
203
|
-
|
|
239
|
+
|
|
204
240
|
with flow_context(flow_cls) as _:
|
|
205
241
|
add_decorator_options(cli_collection)
|
|
206
242
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
)
|
|
243
|
+
def getattr_wrapper(_self, name):
|
|
244
|
+
# Functools.partial do not automatically bind self (no __get__)
|
|
245
|
+
return _self._internal_getattr(_self, name)
|
|
246
|
+
|
|
247
|
+
class_dict = {
|
|
248
|
+
"__module__": "metaflow",
|
|
249
|
+
"_API_NAME": flow_file,
|
|
250
|
+
"_internal_getattr": functools.partial(
|
|
251
|
+
_lazy_load_command, cli_collection, "_compute_flow_parameters"
|
|
252
|
+
),
|
|
253
|
+
"__getattr__": getattr_wrapper,
|
|
254
|
+
}
|
|
220
255
|
|
|
221
256
|
to_return = type(flow_file, (MetaflowAPI,), class_dict)
|
|
222
257
|
to_return.__name__ = flow_file
|
|
@@ -237,11 +272,11 @@ class MetaflowAPI(object):
|
|
|
237
272
|
defaults,
|
|
238
273
|
**kwargs,
|
|
239
274
|
)
|
|
240
|
-
return to_return(parent=None, **method_params)
|
|
275
|
+
return to_return(parent=None, flow_cls=flow_cls, **method_params)
|
|
241
276
|
|
|
242
277
|
m = _method
|
|
243
|
-
m.__name__ =
|
|
244
|
-
m.__doc__ = getattr(
|
|
278
|
+
m.__name__ = cli_collection.name
|
|
279
|
+
m.__doc__ = getattr(cli_collection, "help", None)
|
|
245
280
|
m.__signature__ = inspect.signature(_method).replace(
|
|
246
281
|
parameters=params_sigs.values()
|
|
247
282
|
)
|
|
@@ -287,6 +322,25 @@ class MetaflowAPI(object):
|
|
|
287
322
|
|
|
288
323
|
return components
|
|
289
324
|
|
|
325
|
+
def _compute_flow_parameters(self):
|
|
326
|
+
if self._flow_cls is None or self._parent is not None:
|
|
327
|
+
raise RuntimeError(
|
|
328
|
+
"Computing flow-level parameters for a non start API. "
|
|
329
|
+
"Please report to the Metaflow team."
|
|
330
|
+
)
|
|
331
|
+
# TODO: We need to actually compute the new parameters (based on configs) which
|
|
332
|
+
# would involve processing the options at least partially. We will do this
|
|
333
|
+
# before GA but for now making it work for regular parameters
|
|
334
|
+
if self._cached_computed_parameters is not None:
|
|
335
|
+
return self._cached_computed_parameters
|
|
336
|
+
self._cached_computed_parameters = []
|
|
337
|
+
for _, param in self._flow_cls._get_parameters():
|
|
338
|
+
if param.IS_CONFIG_PARAMETER:
|
|
339
|
+
continue
|
|
340
|
+
param.init()
|
|
341
|
+
self._cached_computed_parameters.append(param)
|
|
342
|
+
return self._cached_computed_parameters
|
|
343
|
+
|
|
290
344
|
|
|
291
345
|
def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
|
|
292
346
|
arg_params_sigs = OrderedDict()
|
|
@@ -351,7 +405,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
|
|
|
351
405
|
method_params = _method_sanity_check(
|
|
352
406
|
possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
|
|
353
407
|
)
|
|
354
|
-
return resulting_class(parent=_self, **method_params)
|
|
408
|
+
return resulting_class(parent=_self, flow_cls=None, **method_params)
|
|
355
409
|
|
|
356
410
|
m = _method
|
|
357
411
|
m.__name__ = cmd_obj.name
|
metaflow/runner/deployer.py
CHANGED
|
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
|
|
|
64
64
|
The directory to run the subprocess in; if not specified, the current
|
|
65
65
|
directory is used.
|
|
66
66
|
file_read_timeout : int, default 3600
|
|
67
|
-
The timeout until which we try to read the deployer attribute file.
|
|
67
|
+
The timeout until which we try to read the deployer attribute file (in seconds).
|
|
68
68
|
**kwargs : Any
|
|
69
69
|
Additional arguments that you would pass to `python myflow.py` before
|
|
70
70
|
the deployment command.
|
metaflow/runner/deployer_impl.py
CHANGED
|
@@ -37,7 +37,7 @@ class DeployerImpl(object):
|
|
|
37
37
|
The directory to run the subprocess in; if not specified, the current
|
|
38
38
|
directory is used.
|
|
39
39
|
file_read_timeout : int, default 3600
|
|
40
|
-
The timeout until which we try to read the deployer attribute file.
|
|
40
|
+
The timeout until which we try to read the deployer attribute file (in seconds).
|
|
41
41
|
**kwargs : Any
|
|
42
42
|
Additional arguments that you would pass to `python myflow.py` before
|
|
43
43
|
the deployment command.
|
|
@@ -144,7 +144,7 @@ class DeployerImpl(object):
|
|
|
144
144
|
# Additional info is used to pass additional deployer specific information.
|
|
145
145
|
# It is used in non-OSS deployers (extensions).
|
|
146
146
|
self.additional_info = content.get("additional_info", {})
|
|
147
|
-
|
|
147
|
+
command_obj.sync_wait()
|
|
148
148
|
if command_obj.process.returncode == 0:
|
|
149
149
|
return create_class(deployer=self)
|
|
150
150
|
|
|
@@ -221,7 +221,7 @@ class Runner(object):
|
|
|
221
221
|
The directory to run the subprocess in; if not specified, the current
|
|
222
222
|
directory is used.
|
|
223
223
|
file_read_timeout : int, default 3600
|
|
224
|
-
The timeout until which we try to read the runner attribute file.
|
|
224
|
+
The timeout until which we try to read the runner attribute file (in seconds).
|
|
225
225
|
**kwargs : Any
|
|
226
226
|
Additional arguments that you would pass to `python myflow.py` before
|
|
227
227
|
the `run` command.
|
|
@@ -272,6 +272,9 @@ class Runner(object):
|
|
|
272
272
|
|
|
273
273
|
def __get_executing_run(self, attribute_file_fd, command_obj):
|
|
274
274
|
content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
|
|
275
|
+
|
|
276
|
+
command_obj.sync_wait()
|
|
277
|
+
|
|
275
278
|
content = json.loads(content)
|
|
276
279
|
pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
|
|
277
280
|
|
metaflow/runner/nbdeploy.py
CHANGED
|
@@ -46,6 +46,8 @@ class NBDeployer(object):
|
|
|
46
46
|
base_dir : str, optional, default None
|
|
47
47
|
The directory to run the subprocess in; if not specified, the current
|
|
48
48
|
working directory is used.
|
|
49
|
+
file_read_timeout : int, default 3600
|
|
50
|
+
The timeout until which we try to read the deployer attribute file (in seconds).
|
|
49
51
|
**kwargs : Any
|
|
50
52
|
Additional arguments that you would pass to `python myflow.py` i.e. options
|
|
51
53
|
listed in `python myflow.py --help`
|
metaflow/runner/nbrun.py
CHANGED
|
@@ -44,7 +44,7 @@ class NBRunner(object):
|
|
|
44
44
|
The directory to run the subprocess in; if not specified, the current
|
|
45
45
|
working directory is used.
|
|
46
46
|
file_read_timeout : int, default 3600
|
|
47
|
-
The timeout until which we try to read the runner attribute file.
|
|
47
|
+
The timeout until which we try to read the runner attribute file (in seconds).
|
|
48
48
|
**kwargs : Any
|
|
49
49
|
Additional arguments that you would pass to `python myflow.py` before
|
|
50
50
|
the `run` command.
|
|
@@ -120,6 +120,9 @@ class SubprocessManager(object):
|
|
|
120
120
|
"""
|
|
121
121
|
Run a command synchronously and return its process ID.
|
|
122
122
|
|
|
123
|
+
Note: in no case does this wait for the process to *finish*. Use sync_wait()
|
|
124
|
+
to wait for the command to finish.
|
|
125
|
+
|
|
123
126
|
Parameters
|
|
124
127
|
----------
|
|
125
128
|
command : List[str]
|
|
@@ -145,7 +148,6 @@ class SubprocessManager(object):
|
|
|
145
148
|
command_obj = CommandManager(command, env, cwd)
|
|
146
149
|
pid = command_obj.run(show_output=show_output)
|
|
147
150
|
self.commands[pid] = command_obj
|
|
148
|
-
command_obj.sync_wait()
|
|
149
151
|
return pid
|
|
150
152
|
|
|
151
153
|
async def async_run_command(
|
metaflow/runner/utils.py
CHANGED
|
@@ -91,7 +91,7 @@ def read_from_fifo_when_ready(
|
|
|
91
91
|
encoding : str, optional
|
|
92
92
|
Encoding to use while reading the file, by default "utf-8".
|
|
93
93
|
timeout : int, optional
|
|
94
|
-
Timeout for reading the file in
|
|
94
|
+
Timeout for reading the file in seconds, by default 3600.
|
|
95
95
|
|
|
96
96
|
Returns
|
|
97
97
|
-------
|
|
@@ -107,30 +107,47 @@ def read_from_fifo_when_ready(
|
|
|
107
107
|
content to the FIFO.
|
|
108
108
|
"""
|
|
109
109
|
content = bytearray()
|
|
110
|
-
|
|
111
110
|
poll = select.poll()
|
|
112
111
|
poll.register(fifo_fd, select.POLLIN)
|
|
113
|
-
|
|
112
|
+
max_timeout = 3 # Wait for 10 * 3 = 30 ms after last write
|
|
114
113
|
while True:
|
|
115
|
-
|
|
116
|
-
poll.poll(timeout)
|
|
117
|
-
timeout -= 1000 * (time.time() - poll_begin)
|
|
118
|
-
|
|
119
|
-
if timeout <= 0:
|
|
114
|
+
if timeout < 0:
|
|
120
115
|
raise TimeoutError("Timeout while waiting for the file content")
|
|
121
116
|
|
|
117
|
+
poll_begin = time.time()
|
|
118
|
+
# We poll for a very short time to be also able to check if the file was closed
|
|
119
|
+
# If the file is closed, we assume that we only have one writer so if we have
|
|
120
|
+
# data, we break out. This is to work around issues in macos
|
|
121
|
+
events = poll.poll(min(10, timeout * 1000))
|
|
122
|
+
timeout -= time.time() - poll_begin
|
|
123
|
+
|
|
122
124
|
try:
|
|
123
|
-
data = os.read(fifo_fd,
|
|
124
|
-
|
|
125
|
+
data = os.read(fifo_fd, 8192)
|
|
126
|
+
if data:
|
|
125
127
|
content += data
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
else:
|
|
129
|
+
if len(events):
|
|
130
|
+
# We read an EOF -- consider the file done
|
|
131
|
+
break
|
|
132
|
+
else:
|
|
133
|
+
# We had no events (just a timeout) and the read didn't return
|
|
134
|
+
# an exception so the file is still open; we continue waiting for data
|
|
135
|
+
# Unfortunately, on MacOS, it seems that even *after* the file is
|
|
136
|
+
# closed on the other end, we still don't get a BlockingIOError so
|
|
137
|
+
# we hack our way and timeout if there is no write in 30ms which is
|
|
138
|
+
# a relative eternity for file writes.
|
|
139
|
+
if content:
|
|
140
|
+
if max_timeout <= 0:
|
|
141
|
+
break
|
|
142
|
+
max_timeout -= 1
|
|
143
|
+
continue
|
|
131
144
|
except BlockingIOError:
|
|
132
|
-
|
|
133
|
-
|
|
145
|
+
has_blocking_error = True
|
|
146
|
+
if content:
|
|
147
|
+
# The file was closed
|
|
148
|
+
break
|
|
149
|
+
# else, if we have no content, we continue waiting for the file to be open
|
|
150
|
+
# and written to.
|
|
134
151
|
|
|
135
152
|
if not content and check_process_exited(command_obj):
|
|
136
153
|
raise CalledProcessError(command_obj.process.returncode, command_obj.command)
|
|
@@ -156,7 +173,7 @@ async def async_read_from_fifo_when_ready(
|
|
|
156
173
|
encoding : str, optional
|
|
157
174
|
Encoding to use while reading the file, by default "utf-8".
|
|
158
175
|
timeout : int, optional
|
|
159
|
-
Timeout for reading the file in
|
|
176
|
+
Timeout for reading the file in seconds, by default 3600.
|
|
160
177
|
|
|
161
178
|
Returns
|
|
162
179
|
-------
|
|
@@ -206,7 +223,7 @@ def handle_timeout(
|
|
|
206
223
|
command_obj : CommandManager
|
|
207
224
|
Command manager object that encapsulates the running command details.
|
|
208
225
|
file_read_timeout : int
|
|
209
|
-
Timeout for reading the file
|
|
226
|
+
Timeout for reading the file, in seconds
|
|
210
227
|
|
|
211
228
|
Returns
|
|
212
229
|
-------
|
|
@@ -243,7 +260,7 @@ async def async_handle_timeout(
|
|
|
243
260
|
command_obj : CommandManager
|
|
244
261
|
Command manager object that encapsulates the running command details.
|
|
245
262
|
file_read_timeout : int
|
|
246
|
-
Timeout for reading the file
|
|
263
|
+
Timeout for reading the file, in seconds
|
|
247
264
|
|
|
248
265
|
Returns
|
|
249
266
|
-------
|