ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.12.39.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +84 -697
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +358 -0
- metaflow/cli_components/step_cmd.py +189 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/decorators.py +63 -2
- metaflow/extension_support/plugins.py +41 -27
- metaflow/flowspec.py +156 -16
- metaflow/includefile.py +50 -22
- metaflow/metaflow_config.py +1 -1
- metaflow/package.py +17 -3
- metaflow/parameters.py +80 -23
- metaflow/plugins/__init__.py +4 -0
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +41 -1
- metaflow/plugins/argo/argo_workflows_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
- metaflow/plugins/aws/batch/batch_decorator.py +2 -2
- metaflow/plugins/aws/step_functions/step_functions.py +32 -0
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
- metaflow/plugins/pypi/bootstrap.py +196 -61
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/conda_environment.py +76 -21
- metaflow/plugins/pypi/micromamba.py +42 -15
- metaflow/plugins/pypi/pip.py +8 -3
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +73 -19
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +2 -2
- metaflow/runner/metaflow_runner.py +4 -1
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +3 -1
- metaflow/runner/utils.py +41 -19
- metaflow/runtime.py +111 -73
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +495 -0
- metaflow/user_configs/config_parameters.py +386 -0
- metaflow/util.py +17 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/METADATA +3 -2
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/RECORD +58 -48
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/top_level.txt +0 -0
|
@@ -50,20 +50,26 @@ class CondaStepDecorator(StepDecorator):
|
|
|
50
50
|
# conda channels, users can specify channel::package as the package name.
|
|
51
51
|
|
|
52
52
|
def __init__(self, attributes=None, statically_defined=False):
|
|
53
|
-
self.
|
|
54
|
-
attributes.
|
|
53
|
+
self._attributes_with_user_values = (
|
|
54
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
55
55
|
)
|
|
56
|
+
|
|
56
57
|
super(CondaStepDecorator, self).__init__(attributes, statically_defined)
|
|
57
58
|
|
|
59
|
+
def init(self):
|
|
60
|
+
super(CondaStepDecorator, self).init()
|
|
61
|
+
|
|
58
62
|
# Support legacy 'libraries=' attribute for the decorator.
|
|
59
63
|
self.attributes["packages"] = {
|
|
60
64
|
**self.attributes["libraries"],
|
|
61
65
|
**self.attributes["packages"],
|
|
62
66
|
}
|
|
63
67
|
del self.attributes["libraries"]
|
|
68
|
+
if self.attributes["packages"]:
|
|
69
|
+
self._attributes_with_user_values.add("packages")
|
|
64
70
|
|
|
65
71
|
def is_attribute_user_defined(self, name):
|
|
66
|
-
return name in self.
|
|
72
|
+
return name in self._attributes_with_user_values
|
|
67
73
|
|
|
68
74
|
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
69
75
|
# The init_environment hook for Environment creates the relevant virtual
|
|
@@ -83,10 +89,10 @@ class CondaStepDecorator(StepDecorator):
|
|
|
83
89
|
**super_attributes["packages"],
|
|
84
90
|
**self.attributes["packages"],
|
|
85
91
|
}
|
|
86
|
-
self.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
self._attributes_with_user_values.update(
|
|
93
|
+
conda_base._attributes_with_user_values
|
|
94
|
+
)
|
|
95
|
+
|
|
90
96
|
self.attributes["python"] = (
|
|
91
97
|
self.attributes["python"] or super_attributes["python"]
|
|
92
98
|
)
|
|
@@ -333,11 +339,15 @@ class CondaFlowDecorator(FlowDecorator):
|
|
|
333
339
|
}
|
|
334
340
|
|
|
335
341
|
def __init__(self, attributes=None, statically_defined=False):
|
|
336
|
-
self.
|
|
337
|
-
attributes.
|
|
342
|
+
self._attributes_with_user_values = (
|
|
343
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
338
344
|
)
|
|
345
|
+
|
|
339
346
|
super(CondaFlowDecorator, self).__init__(attributes, statically_defined)
|
|
340
347
|
|
|
348
|
+
def init(self):
|
|
349
|
+
super(CondaFlowDecorator, self).init()
|
|
350
|
+
|
|
341
351
|
# Support legacy 'libraries=' attribute for the decorator.
|
|
342
352
|
self.attributes["packages"] = {
|
|
343
353
|
**self.attributes["libraries"],
|
|
@@ -348,7 +358,7 @@ class CondaFlowDecorator(FlowDecorator):
|
|
|
348
358
|
self.attributes["python"] = str(self.attributes["python"])
|
|
349
359
|
|
|
350
360
|
def is_attribute_user_defined(self, name):
|
|
351
|
-
return name in self.
|
|
361
|
+
return name in self._attributes_with_user_values
|
|
352
362
|
|
|
353
363
|
def flow_init(
|
|
354
364
|
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
|
|
@@ -5,10 +5,11 @@ import functools
|
|
|
5
5
|
import io
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
|
-
import sys
|
|
9
8
|
import tarfile
|
|
9
|
+
import threading
|
|
10
10
|
import time
|
|
11
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from functools import wraps
|
|
12
13
|
from hashlib import sha256
|
|
13
14
|
from io import BufferedIOBase, BytesIO
|
|
14
15
|
from itertools import chain
|
|
@@ -50,7 +51,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
50
51
|
|
|
51
52
|
def validate_environment(self, logger, datastore_type):
|
|
52
53
|
self.datastore_type = datastore_type
|
|
53
|
-
self.logger = logger
|
|
54
54
|
|
|
55
55
|
# Avoiding circular imports.
|
|
56
56
|
from metaflow.plugins import DATASTORES
|
|
@@ -62,8 +62,21 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
62
62
|
from .micromamba import Micromamba
|
|
63
63
|
from .pip import Pip
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
print_lock = threading.Lock()
|
|
66
|
+
|
|
67
|
+
def make_thread_safe(func):
|
|
68
|
+
@wraps(func)
|
|
69
|
+
def wrapper(*args, **kwargs):
|
|
70
|
+
with print_lock:
|
|
71
|
+
return func(*args, **kwargs)
|
|
72
|
+
|
|
73
|
+
return wrapper
|
|
74
|
+
|
|
75
|
+
self.logger = make_thread_safe(logger)
|
|
76
|
+
|
|
77
|
+
# TODO: Wire up logging
|
|
78
|
+
micromamba = Micromamba(self.logger)
|
|
79
|
+
self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
|
|
67
80
|
|
|
68
81
|
def init_environment(self, echo, only_steps=None):
|
|
69
82
|
# The implementation optimizes for latency to ensure as many operations can
|
|
@@ -150,6 +163,9 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
150
163
|
(
|
|
151
164
|
package["path"],
|
|
152
165
|
# Lazily fetch package from the interweb if needed.
|
|
166
|
+
# TODO: Depending on the len_hint, the package might be downloaded from
|
|
167
|
+
# the interweb prematurely. save_bytes needs to be adjusted to handle
|
|
168
|
+
# this scenario.
|
|
153
169
|
LazyOpen(
|
|
154
170
|
package["local_path"],
|
|
155
171
|
"rb",
|
|
@@ -166,22 +182,60 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
166
182
|
if id_ in dirty:
|
|
167
183
|
self.write_to_environment_manifest([id_, platform, type_], packages)
|
|
168
184
|
|
|
169
|
-
|
|
185
|
+
storage = None
|
|
186
|
+
if self.datastore_type not in ["local"]:
|
|
187
|
+
# Initialize storage for caching if using a remote datastore
|
|
188
|
+
storage = self.datastore(_datastore_packageroot(self.datastore, echo))
|
|
189
|
+
|
|
170
190
|
self.logger("Bootstrapping virtual environment(s) ...")
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
191
|
+
# Sequence of operations:
|
|
192
|
+
# 1. Start all conda solves in parallel
|
|
193
|
+
# 2. Download conda packages sequentially
|
|
194
|
+
# 3. Create and cache conda environments in parallel
|
|
195
|
+
# 4. Start PyPI solves in parallel after each conda environment is created
|
|
196
|
+
# 5. Download PyPI packages sequentially
|
|
197
|
+
# 6. Create and cache PyPI environments in parallel
|
|
198
|
+
|
|
199
|
+
with ThreadPoolExecutor() as executor:
|
|
200
|
+
# Start all conda solves in parallel
|
|
201
|
+
conda_futures = [
|
|
202
|
+
executor.submit(lambda x: solve(*x, "conda"), env)
|
|
203
|
+
for env in environments("conda")
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
pypi_envs = {env[0]: env for env in environments("pypi")}
|
|
207
|
+
pypi_futures = []
|
|
208
|
+
|
|
209
|
+
# Process conda results sequentially for downloads
|
|
210
|
+
for future in as_completed(conda_futures):
|
|
211
|
+
result = future.result()
|
|
212
|
+
# Sequential conda download
|
|
213
|
+
self.solvers["conda"].download(*result)
|
|
214
|
+
# Parallel conda create and cache
|
|
215
|
+
create_future = executor.submit(self.solvers["conda"].create, *result)
|
|
216
|
+
if storage:
|
|
217
|
+
executor.submit(cache, storage, [result], "conda")
|
|
218
|
+
|
|
219
|
+
# Queue PyPI solve to start after conda create
|
|
220
|
+
if result[0] in pypi_envs:
|
|
221
|
+
|
|
222
|
+
def pypi_solve(env):
|
|
223
|
+
create_future.result() # Wait for conda create
|
|
224
|
+
return solve(*env, "pypi")
|
|
225
|
+
|
|
226
|
+
pypi_futures.append(
|
|
227
|
+
executor.submit(pypi_solve, pypi_envs[result[0]])
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Process PyPI results sequentially for downloads
|
|
231
|
+
for solve_future in pypi_futures:
|
|
232
|
+
result = solve_future.result()
|
|
233
|
+
# Sequential PyPI download
|
|
234
|
+
self.solvers["pypi"].download(*result)
|
|
235
|
+
# Parallel PyPI create and cache
|
|
236
|
+
executor.submit(self.solvers["pypi"].create, *result)
|
|
237
|
+
if storage:
|
|
238
|
+
executor.submit(cache, storage, [result], "pypi")
|
|
185
239
|
self.logger("Virtual environment(s) bootstrapped!")
|
|
186
240
|
|
|
187
241
|
def executable(self, step_name, default=None):
|
|
@@ -385,7 +439,8 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
385
439
|
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
|
|
386
440
|
% (self.flow.name, id_, self.datastore_type),
|
|
387
441
|
"echo 'Environment bootstrapped.'",
|
|
388
|
-
|
|
442
|
+
# To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
|
|
443
|
+
"export PATH=$PATH:$(pwd)/micromamba/bin",
|
|
389
444
|
]
|
|
390
445
|
else:
|
|
391
446
|
# for @conda/@pypi(disabled=True).
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
3
4
|
import subprocess
|
|
4
5
|
import tempfile
|
|
6
|
+
import time
|
|
5
7
|
|
|
6
8
|
from metaflow.exception import MetaflowException
|
|
7
9
|
from metaflow.util import which
|
|
@@ -19,8 +21,11 @@ class MicromambaException(MetaflowException):
|
|
|
19
21
|
super(MicromambaException, self).__init__(msg)
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
|
|
25
|
+
|
|
26
|
+
|
|
22
27
|
class Micromamba(object):
|
|
23
|
-
def __init__(self):
|
|
28
|
+
def __init__(self, logger=None):
|
|
24
29
|
# micromamba is a tiny version of the mamba package manager and comes with
|
|
25
30
|
# metaflow specific performance enhancements.
|
|
26
31
|
|
|
@@ -33,6 +38,12 @@ class Micromamba(object):
|
|
|
33
38
|
os.path.expanduser(_home),
|
|
34
39
|
"micromamba",
|
|
35
40
|
)
|
|
41
|
+
|
|
42
|
+
if logger:
|
|
43
|
+
self.logger = logger
|
|
44
|
+
else:
|
|
45
|
+
self.logger = lambda *args, **kwargs: None # No-op logger if not provided
|
|
46
|
+
|
|
36
47
|
self.bin = (
|
|
37
48
|
which(os.environ.get("METAFLOW_PATH_TO_MICROMAMBA") or "micromamba")
|
|
38
49
|
or which("./micromamba") # to support remote execution
|
|
@@ -70,6 +81,9 @@ class Micromamba(object):
|
|
|
70
81
|
"MAMBA_ADD_PIP_AS_PYTHON_DEPENDENCY": "true",
|
|
71
82
|
"CONDA_SUBDIR": platform,
|
|
72
83
|
# "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0" # https://github.com/conda/conda/issues/9862
|
|
84
|
+
# Add a default glibc version for linux-64 environments (ignored for other platforms)
|
|
85
|
+
# TODO: Make the version configurable
|
|
86
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
|
73
87
|
}
|
|
74
88
|
cmd = [
|
|
75
89
|
"create",
|
|
@@ -78,6 +92,7 @@ class Micromamba(object):
|
|
|
78
92
|
"--dry-run",
|
|
79
93
|
"--no-extra-safety-checks",
|
|
80
94
|
"--repodata-ttl=86400",
|
|
95
|
+
"--safety-checks=disabled",
|
|
81
96
|
"--retry-clean-cache",
|
|
82
97
|
"--prefix=%s/prefix" % tmp_dir,
|
|
83
98
|
]
|
|
@@ -91,10 +106,11 @@ class Micromamba(object):
|
|
|
91
106
|
cmd.append("python==%s" % python)
|
|
92
107
|
# TODO: Ensure a human readable message is returned when the environment
|
|
93
108
|
# can't be resolved for any and all reasons.
|
|
94
|
-
|
|
109
|
+
solved_packages = [
|
|
95
110
|
{k: v for k, v in item.items() if k in ["url"]}
|
|
96
111
|
for item in self._call(cmd, env)["actions"]["LINK"]
|
|
97
112
|
]
|
|
113
|
+
return solved_packages
|
|
98
114
|
|
|
99
115
|
def download(self, id_, packages, python, platform):
|
|
100
116
|
# Unfortunately all the packages need to be catalogued in package cache
|
|
@@ -103,8 +119,6 @@ class Micromamba(object):
|
|
|
103
119
|
# Micromamba is painfully slow in determining if many packages are infact
|
|
104
120
|
# already cached. As a perf heuristic, we check if the environment already
|
|
105
121
|
# exists to short circuit package downloads.
|
|
106
|
-
if self.path_to_environment(id_, platform):
|
|
107
|
-
return
|
|
108
122
|
|
|
109
123
|
prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
|
|
110
124
|
env_dirs=self.info()["envs_dirs"][0],
|
|
@@ -113,13 +127,18 @@ class Micromamba(object):
|
|
|
113
127
|
id=id_,
|
|
114
128
|
)
|
|
115
129
|
|
|
116
|
-
#
|
|
130
|
+
# cheap check
|
|
117
131
|
if os.path.exists(f"{prefix}/fake.done"):
|
|
118
132
|
return
|
|
119
133
|
|
|
134
|
+
# somewhat expensive check
|
|
135
|
+
if self.path_to_environment(id_, platform):
|
|
136
|
+
return
|
|
137
|
+
|
|
120
138
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
121
139
|
env = {
|
|
122
140
|
"CONDA_SUBDIR": platform,
|
|
141
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
|
123
142
|
}
|
|
124
143
|
cmd = [
|
|
125
144
|
"create",
|
|
@@ -159,6 +178,7 @@ class Micromamba(object):
|
|
|
159
178
|
# use hardlinks when possible, otherwise copy files
|
|
160
179
|
# disabled for now since it adds to environment creation latencies
|
|
161
180
|
"CONDA_ALLOW_SOFTLINKS": "0",
|
|
181
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
|
162
182
|
}
|
|
163
183
|
cmd = [
|
|
164
184
|
"create",
|
|
@@ -174,6 +194,7 @@ class Micromamba(object):
|
|
|
174
194
|
cmd.append("{url}".format(**package))
|
|
175
195
|
self._call(cmd, env)
|
|
176
196
|
|
|
197
|
+
@functools.lru_cache(maxsize=None)
|
|
177
198
|
def info(self):
|
|
178
199
|
return self._call(["config", "list", "-a"])
|
|
179
200
|
|
|
@@ -198,18 +219,24 @@ class Micromamba(object):
|
|
|
198
219
|
}
|
|
199
220
|
directories = self.info()["pkgs_dirs"]
|
|
200
221
|
# search all package caches for packages
|
|
201
|
-
|
|
202
|
-
|
|
222
|
+
|
|
223
|
+
file_to_path = {}
|
|
224
|
+
for d in directories:
|
|
225
|
+
if os.path.isdir(d):
|
|
226
|
+
try:
|
|
227
|
+
with os.scandir(d) as entries:
|
|
228
|
+
for entry in entries:
|
|
229
|
+
if entry.is_file():
|
|
230
|
+
# Prefer the first occurrence if the file exists in multiple directories
|
|
231
|
+
file_to_path.setdefault(entry.name, entry.path)
|
|
232
|
+
except OSError:
|
|
233
|
+
continue
|
|
234
|
+
ret = {
|
|
235
|
+
# set package tarball local paths to None if package tarballs are missing
|
|
236
|
+
url: file_to_path.get(file)
|
|
203
237
|
for url, file in packages_to_filenames.items()
|
|
204
|
-
for d in directories
|
|
205
|
-
if os.path.isdir(d)
|
|
206
|
-
and file in os.listdir(d)
|
|
207
|
-
and os.path.isfile(os.path.join(d, file))
|
|
208
238
|
}
|
|
209
|
-
|
|
210
|
-
for url in packages_to_filenames:
|
|
211
|
-
metadata.setdefault(url, None)
|
|
212
|
-
return metadata
|
|
239
|
+
return ret
|
|
213
240
|
|
|
214
241
|
def interpreter(self, id_):
|
|
215
242
|
return os.path.join(self.path_to_environment(id_), "bin/python")
|
metaflow/plugins/pypi/pip.py
CHANGED
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
import shutil
|
|
5
5
|
import subprocess
|
|
6
6
|
import tempfile
|
|
7
|
+
import time
|
|
7
8
|
from concurrent.futures import ThreadPoolExecutor
|
|
8
9
|
from itertools import chain, product
|
|
9
10
|
from urllib.parse import unquote
|
|
@@ -50,10 +51,14 @@ INSTALLATION_MARKER = "{prefix}/.pip/id"
|
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
class Pip(object):
|
|
53
|
-
def __init__(self, micromamba=None):
|
|
54
|
+
def __init__(self, micromamba=None, logger=None):
|
|
54
55
|
# pip is assumed to be installed inside a conda environment managed by
|
|
55
56
|
# micromamba. pip commands are executed using `micromamba run --prefix`
|
|
56
|
-
self.micromamba = micromamba or Micromamba()
|
|
57
|
+
self.micromamba = micromamba or Micromamba(logger)
|
|
58
|
+
if logger:
|
|
59
|
+
self.logger = logger
|
|
60
|
+
else:
|
|
61
|
+
self.logger = lambda *args, **kwargs: None # No-op logger if not provided
|
|
57
62
|
|
|
58
63
|
def solve(self, id_, packages, python, platform):
|
|
59
64
|
prefix = self.micromamba.path_to_environment(id_)
|
|
@@ -123,7 +128,7 @@ class Pip(object):
|
|
|
123
128
|
**res,
|
|
124
129
|
subdir_str=(
|
|
125
130
|
"#subdirectory=%s" % subdirectory if subdirectory else ""
|
|
126
|
-
)
|
|
131
|
+
),
|
|
127
132
|
)
|
|
128
133
|
# used to deduplicate the storage location in case wheel does not
|
|
129
134
|
# build with enough unique identifiers.
|
|
@@ -25,9 +25,10 @@ class PyPIStepDecorator(StepDecorator):
|
|
|
25
25
|
defaults = {"packages": {}, "python": None, "disabled": None} # wheels
|
|
26
26
|
|
|
27
27
|
def __init__(self, attributes=None, statically_defined=False):
|
|
28
|
-
self.
|
|
29
|
-
attributes.
|
|
28
|
+
self._attributes_with_user_values = (
|
|
29
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
30
30
|
)
|
|
31
|
+
|
|
31
32
|
super().__init__(attributes, statically_defined)
|
|
32
33
|
|
|
33
34
|
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
@@ -42,10 +43,9 @@ class PyPIStepDecorator(StepDecorator):
|
|
|
42
43
|
if "pypi_base" in self.flow._flow_decorators:
|
|
43
44
|
pypi_base = self.flow._flow_decorators["pypi_base"][0]
|
|
44
45
|
super_attributes = pypi_base.attributes
|
|
45
|
-
self.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
46
|
+
self._attributes_with_user_values.update(
|
|
47
|
+
pypi_base._attributes_with_user_values
|
|
48
|
+
)
|
|
49
49
|
self.attributes["packages"] = {
|
|
50
50
|
**super_attributes["packages"],
|
|
51
51
|
**self.attributes["packages"],
|
|
@@ -106,7 +106,7 @@ class PyPIStepDecorator(StepDecorator):
|
|
|
106
106
|
environment.set_local_root(LocalStorage.get_datastore_root_from_config(logger))
|
|
107
107
|
|
|
108
108
|
def is_attribute_user_defined(self, name):
|
|
109
|
-
return name in self.
|
|
109
|
+
return name in self._attributes_with_user_values
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
class PyPIFlowDecorator(FlowDecorator):
|
|
@@ -129,9 +129,10 @@ class PyPIFlowDecorator(FlowDecorator):
|
|
|
129
129
|
defaults = {"packages": {}, "python": None, "disabled": None}
|
|
130
130
|
|
|
131
131
|
def __init__(self, attributes=None, statically_defined=False):
|
|
132
|
-
self.
|
|
133
|
-
attributes.
|
|
132
|
+
self._attributes_with_user_values = (
|
|
133
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
134
134
|
)
|
|
135
|
+
|
|
135
136
|
super().__init__(attributes, statically_defined)
|
|
136
137
|
|
|
137
138
|
def flow_init(
|
|
@@ -140,6 +141,7 @@ class PyPIFlowDecorator(FlowDecorator):
|
|
|
140
141
|
from metaflow import decorators
|
|
141
142
|
|
|
142
143
|
decorators._attach_decorators(flow, ["pypi"])
|
|
144
|
+
decorators._init(flow)
|
|
143
145
|
|
|
144
146
|
# @pypi uses a conda environment to create a virtual environment.
|
|
145
147
|
# The conda environment can be created through micromamba.
|
|
@@ -37,8 +37,8 @@ class TimeoutDecorator(StepDecorator):
|
|
|
37
37
|
name = "timeout"
|
|
38
38
|
defaults = {"seconds": 0, "minutes": 0, "hours": 0}
|
|
39
39
|
|
|
40
|
-
def
|
|
41
|
-
super(
|
|
40
|
+
def init(self):
|
|
41
|
+
super().init()
|
|
42
42
|
# Initialize secs in __init__ so other decorators could safely use this
|
|
43
43
|
# value without worrying about decorator order.
|
|
44
44
|
# Convert values in attributes to type:int since they can be type:str
|
metaflow/runner/click_api.py
CHANGED
|
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
|
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
|
+
import functools
|
|
12
13
|
import importlib
|
|
13
14
|
import inspect
|
|
14
15
|
import itertools
|
|
@@ -38,6 +39,7 @@ from metaflow.decorators import add_decorator_options
|
|
|
38
39
|
from metaflow.exception import MetaflowException
|
|
39
40
|
from metaflow.includefile import FilePathClass
|
|
40
41
|
from metaflow.parameters import JSONTypeClass, flow_context
|
|
42
|
+
from metaflow.user_configs.config_options import LocalFileInput
|
|
41
43
|
|
|
42
44
|
# Define a recursive type alias for JSON
|
|
43
45
|
JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
|
|
@@ -55,6 +57,7 @@ click_to_python_types = {
|
|
|
55
57
|
File: str,
|
|
56
58
|
JSONTypeClass: JSON,
|
|
57
59
|
FilePathClass: str,
|
|
60
|
+
LocalFileInput: str,
|
|
58
61
|
}
|
|
59
62
|
|
|
60
63
|
|
|
@@ -124,6 +127,37 @@ def _method_sanity_check(
|
|
|
124
127
|
return method_params
|
|
125
128
|
|
|
126
129
|
|
|
130
|
+
def _lazy_load_command(
|
|
131
|
+
cli_collection: click.Group,
|
|
132
|
+
flow_parameters: Union[str, List[Parameter]],
|
|
133
|
+
_self,
|
|
134
|
+
name: str,
|
|
135
|
+
):
|
|
136
|
+
|
|
137
|
+
# Context is not used in get_command so we can pass None. Since we pin click,
|
|
138
|
+
# this won't change from under us.
|
|
139
|
+
|
|
140
|
+
if isinstance(flow_parameters, str):
|
|
141
|
+
# Resolve flow_parameters -- for start, this is a function which we
|
|
142
|
+
# need to call to figure out the actual parameters (may be changed by configs)
|
|
143
|
+
flow_parameters = getattr(_self, flow_parameters)()
|
|
144
|
+
cmd_obj = cli_collection.get_command(None, name)
|
|
145
|
+
if cmd_obj:
|
|
146
|
+
if isinstance(cmd_obj, click.Group):
|
|
147
|
+
# TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
|
|
148
|
+
result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
|
|
149
|
+
elif isinstance(cmd_obj, click.Command):
|
|
150
|
+
result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
|
|
151
|
+
else:
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
"Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
|
|
154
|
+
)
|
|
155
|
+
setattr(_self, name, result)
|
|
156
|
+
return result
|
|
157
|
+
else:
|
|
158
|
+
raise AttributeError()
|
|
159
|
+
|
|
160
|
+
|
|
127
161
|
def get_annotation(param: Union[click.Argument, click.Option]):
|
|
128
162
|
py_type = click_to_python_types[type(param.type)]
|
|
129
163
|
if not param.required:
|
|
@@ -179,9 +213,11 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
|
|
|
179
213
|
|
|
180
214
|
|
|
181
215
|
class MetaflowAPI(object):
|
|
182
|
-
def __init__(self, parent=None, **kwargs):
|
|
216
|
+
def __init__(self, parent=None, flow_cls=None, **kwargs):
|
|
183
217
|
self._parent = parent
|
|
184
218
|
self._chain = [{self._API_NAME: kwargs}]
|
|
219
|
+
self._flow_cls = flow_cls
|
|
220
|
+
self._cached_computed_parameters = None
|
|
185
221
|
|
|
186
222
|
@property
|
|
187
223
|
def parent(self):
|
|
@@ -200,23 +236,22 @@ class MetaflowAPI(object):
|
|
|
200
236
|
@classmethod
|
|
201
237
|
def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
|
|
202
238
|
flow_cls = extract_flow_class_from_file(flow_file)
|
|
203
|
-
|
|
239
|
+
|
|
204
240
|
with flow_context(flow_cls) as _:
|
|
205
241
|
add_decorator_options(cli_collection)
|
|
206
242
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
)
|
|
243
|
+
def getattr_wrapper(_self, name):
|
|
244
|
+
# Functools.partial do not automatically bind self (no __get__)
|
|
245
|
+
return _self._internal_getattr(_self, name)
|
|
246
|
+
|
|
247
|
+
class_dict = {
|
|
248
|
+
"__module__": "metaflow",
|
|
249
|
+
"_API_NAME": flow_file,
|
|
250
|
+
"_internal_getattr": functools.partial(
|
|
251
|
+
_lazy_load_command, cli_collection, "_compute_flow_parameters"
|
|
252
|
+
),
|
|
253
|
+
"__getattr__": getattr_wrapper,
|
|
254
|
+
}
|
|
220
255
|
|
|
221
256
|
to_return = type(flow_file, (MetaflowAPI,), class_dict)
|
|
222
257
|
to_return.__name__ = flow_file
|
|
@@ -237,11 +272,11 @@ class MetaflowAPI(object):
|
|
|
237
272
|
defaults,
|
|
238
273
|
**kwargs,
|
|
239
274
|
)
|
|
240
|
-
return to_return(parent=None, **method_params)
|
|
275
|
+
return to_return(parent=None, flow_cls=flow_cls, **method_params)
|
|
241
276
|
|
|
242
277
|
m = _method
|
|
243
|
-
m.__name__ =
|
|
244
|
-
m.__doc__ = getattr(
|
|
278
|
+
m.__name__ = cli_collection.name
|
|
279
|
+
m.__doc__ = getattr(cli_collection, "help", None)
|
|
245
280
|
m.__signature__ = inspect.signature(_method).replace(
|
|
246
281
|
parameters=params_sigs.values()
|
|
247
282
|
)
|
|
@@ -287,6 +322,25 @@ class MetaflowAPI(object):
|
|
|
287
322
|
|
|
288
323
|
return components
|
|
289
324
|
|
|
325
|
+
def _compute_flow_parameters(self):
|
|
326
|
+
if self._flow_cls is None or self._parent is not None:
|
|
327
|
+
raise RuntimeError(
|
|
328
|
+
"Computing flow-level parameters for a non start API. "
|
|
329
|
+
"Please report to the Metaflow team."
|
|
330
|
+
)
|
|
331
|
+
# TODO: We need to actually compute the new parameters (based on configs) which
|
|
332
|
+
# would involve processing the options at least partially. We will do this
|
|
333
|
+
# before GA but for now making it work for regular parameters
|
|
334
|
+
if self._cached_computed_parameters is not None:
|
|
335
|
+
return self._cached_computed_parameters
|
|
336
|
+
self._cached_computed_parameters = []
|
|
337
|
+
for _, param in self._flow_cls._get_parameters():
|
|
338
|
+
if param.IS_CONFIG_PARAMETER:
|
|
339
|
+
continue
|
|
340
|
+
param.init()
|
|
341
|
+
self._cached_computed_parameters.append(param)
|
|
342
|
+
return self._cached_computed_parameters
|
|
343
|
+
|
|
290
344
|
|
|
291
345
|
def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
|
|
292
346
|
arg_params_sigs = OrderedDict()
|
|
@@ -351,7 +405,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
|
|
|
351
405
|
method_params = _method_sanity_check(
|
|
352
406
|
possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
|
|
353
407
|
)
|
|
354
|
-
return resulting_class(parent=_self, **method_params)
|
|
408
|
+
return resulting_class(parent=_self, flow_cls=None, **method_params)
|
|
355
409
|
|
|
356
410
|
m = _method
|
|
357
411
|
m.__name__ = cmd_obj.name
|
metaflow/runner/deployer.py
CHANGED
|
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
|
|
|
64
64
|
The directory to run the subprocess in; if not specified, the current
|
|
65
65
|
directory is used.
|
|
66
66
|
file_read_timeout : int, default 3600
|
|
67
|
-
The timeout until which we try to read the deployer attribute file.
|
|
67
|
+
The timeout until which we try to read the deployer attribute file (in seconds).
|
|
68
68
|
**kwargs : Any
|
|
69
69
|
Additional arguments that you would pass to `python myflow.py` before
|
|
70
70
|
the deployment command.
|
metaflow/runner/deployer_impl.py
CHANGED
|
@@ -37,7 +37,7 @@ class DeployerImpl(object):
|
|
|
37
37
|
The directory to run the subprocess in; if not specified, the current
|
|
38
38
|
directory is used.
|
|
39
39
|
file_read_timeout : int, default 3600
|
|
40
|
-
The timeout until which we try to read the deployer attribute file.
|
|
40
|
+
The timeout until which we try to read the deployer attribute file (in seconds).
|
|
41
41
|
**kwargs : Any
|
|
42
42
|
Additional arguments that you would pass to `python myflow.py` before
|
|
43
43
|
the deployment command.
|
|
@@ -144,7 +144,7 @@ class DeployerImpl(object):
|
|
|
144
144
|
# Additional info is used to pass additional deployer specific information.
|
|
145
145
|
# It is used in non-OSS deployers (extensions).
|
|
146
146
|
self.additional_info = content.get("additional_info", {})
|
|
147
|
-
|
|
147
|
+
command_obj.sync_wait()
|
|
148
148
|
if command_obj.process.returncode == 0:
|
|
149
149
|
return create_class(deployer=self)
|
|
150
150
|
|
|
@@ -221,7 +221,7 @@ class Runner(object):
|
|
|
221
221
|
The directory to run the subprocess in; if not specified, the current
|
|
222
222
|
directory is used.
|
|
223
223
|
file_read_timeout : int, default 3600
|
|
224
|
-
The timeout until which we try to read the runner attribute file.
|
|
224
|
+
The timeout until which we try to read the runner attribute file (in seconds).
|
|
225
225
|
**kwargs : Any
|
|
226
226
|
Additional arguments that you would pass to `python myflow.py` before
|
|
227
227
|
the `run` command.
|
|
@@ -272,6 +272,9 @@ class Runner(object):
|
|
|
272
272
|
|
|
273
273
|
def __get_executing_run(self, attribute_file_fd, command_obj):
|
|
274
274
|
content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
|
|
275
|
+
|
|
276
|
+
command_obj.sync_wait()
|
|
277
|
+
|
|
275
278
|
content = json.loads(content)
|
|
276
279
|
pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
|
|
277
280
|
|