ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cards.py +1 -0
- metaflow/cli.py +185 -717
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +362 -0
- metaflow/cli_components/step_cmd.py +176 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/decorators.py +63 -2
- metaflow/exception.py +8 -2
- metaflow/extension_support/plugins.py +42 -27
- metaflow/flowspec.py +176 -23
- metaflow/graph.py +28 -27
- metaflow/includefile.py +50 -22
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +10 -1
- metaflow/multicore_utils.py +31 -14
- metaflow/package.py +17 -3
- metaflow/parameters.py +97 -25
- metaflow/plugins/__init__.py +22 -0
- metaflow/plugins/airflow/airflow.py +18 -17
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +195 -132
- metaflow/plugins/argo/argo_workflows_cli.py +1 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +13 -13
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +33 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
- metaflow/plugins/cards/card_cli.py +7 -2
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +79 -8
- metaflow/plugins/cards/card_modules/basic.py +56 -5
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/components.py +64 -16
- metaflow/plugins/cards/card_modules/main.js +27 -25
- metaflow/plugins/cards/card_modules/test_cards.py +4 -4
- metaflow/plugins/cards/component_serializer.py +1 -1
- metaflow/plugins/datatools/s3/s3.py +12 -4
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/events_decorator.py +338 -186
- metaflow/plugins/kubernetes/kube_utils.py +84 -1
- metaflow/plugins/kubernetes/kubernetes.py +40 -92
- metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
- metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/parallel_decorator.py +4 -1
- metaflow/plugins/project_decorator.py +33 -5
- metaflow/plugins/pypi/bootstrap.py +249 -81
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/conda_environment.py +83 -27
- metaflow/plugins/pypi/micromamba.py +82 -37
- metaflow/plugins/pypi/pip.py +9 -6
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +240 -50
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +12 -11
- metaflow/runner/metaflow_runner.py +68 -34
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +61 -10
- metaflow/runner/utils.py +208 -44
- metaflow/runtime.py +216 -112
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/tracing/tracing_modules.py +4 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +548 -0
- metaflow/user_configs/config_parameters.py +436 -0
- metaflow/util.py +22 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import bz2
|
|
2
|
+
import concurrent.futures
|
|
2
3
|
import io
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
@@ -6,21 +7,33 @@ import shutil
|
|
|
6
7
|
import subprocess
|
|
7
8
|
import sys
|
|
8
9
|
import tarfile
|
|
9
|
-
|
|
10
|
+
import time
|
|
11
|
+
from urllib.error import URLError
|
|
12
|
+
from urllib.request import urlopen
|
|
10
13
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
11
14
|
from metaflow.plugins import DATASTORES
|
|
15
|
+
from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
|
|
12
16
|
from metaflow.util import which
|
|
17
|
+
from urllib.request import Request
|
|
18
|
+
import warnings
|
|
13
19
|
|
|
14
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
|
15
21
|
|
|
16
22
|
# Bootstraps a valid conda virtual environment composed of conda and pypi packages
|
|
17
23
|
|
|
18
|
-
if __name__ == "__main__":
|
|
19
|
-
if len(sys.argv) != 5:
|
|
20
|
-
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
|
21
|
-
sys.exit(1)
|
|
22
|
-
_, flow_name, id_, datastore_type, architecture = sys.argv
|
|
23
24
|
|
|
25
|
+
def timer(func):
|
|
26
|
+
def wrapper(*args, **kwargs):
|
|
27
|
+
start_time = time.time()
|
|
28
|
+
result = func(*args, **kwargs)
|
|
29
|
+
duration = time.time() - start_time
|
|
30
|
+
# print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
|
|
31
|
+
return result
|
|
32
|
+
|
|
33
|
+
return wrapper
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
24
37
|
# TODO: Detect architecture on the fly when dealing with arm architectures.
|
|
25
38
|
# ARCH=$(uname -m)
|
|
26
39
|
# OS=$(uname)
|
|
@@ -45,96 +58,251 @@ if __name__ == "__main__":
|
|
|
45
58
|
# fi
|
|
46
59
|
# fi
|
|
47
60
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
61
|
+
def run_cmd(cmd):
|
|
62
|
+
result = subprocess.run(
|
|
63
|
+
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
64
|
+
)
|
|
65
|
+
if result.returncode != 0:
|
|
66
|
+
print(f"Bootstrap failed while executing: {cmd}")
|
|
67
|
+
print("Stdout:", result.stdout)
|
|
68
|
+
print("Stderr:", result.stderr)
|
|
69
|
+
sys.exit(1)
|
|
51
70
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
71
|
+
@timer
|
|
72
|
+
def install_micromamba(architecture):
|
|
73
|
+
micromamba_dir = os.path.join(os.getcwd(), "micromamba")
|
|
74
|
+
micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
|
|
75
|
+
|
|
76
|
+
if which("micromamba"):
|
|
77
|
+
return which("micromamba")
|
|
78
|
+
if os.path.exists(micromamba_path):
|
|
79
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
|
80
|
+
return micromamba_path
|
|
81
|
+
|
|
82
|
+
# Download and extract in one go
|
|
83
|
+
url = MICROMAMBA_URL.format(platform=architecture, version="2.0.4")
|
|
84
|
+
mirror_url = MICROMAMBA_MIRROR_URL.format(
|
|
85
|
+
platform=architecture, version="2.0.4"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Prepare directory once
|
|
89
|
+
os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
|
|
90
|
+
|
|
91
|
+
# Download and decompress in one go
|
|
92
|
+
def _download_and_extract(url):
|
|
93
|
+
headers = {
|
|
94
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
95
|
+
"Connection": "keep-alive",
|
|
96
|
+
"User-Agent": "python-urllib",
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
max_retries = 3
|
|
100
|
+
for attempt in range(max_retries):
|
|
101
|
+
try:
|
|
102
|
+
req = Request(url, headers=headers)
|
|
56
103
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
104
|
+
with urlopen(req) as response:
|
|
105
|
+
decompressor = bz2.BZ2Decompressor()
|
|
106
|
+
with warnings.catch_warnings():
|
|
107
|
+
warnings.filterwarnings(
|
|
108
|
+
"ignore", category=DeprecationWarning
|
|
109
|
+
)
|
|
110
|
+
with tarfile.open(
|
|
111
|
+
fileobj=io.BytesIO(
|
|
112
|
+
decompressor.decompress(response.read())
|
|
113
|
+
),
|
|
114
|
+
mode="r:",
|
|
115
|
+
) as tar:
|
|
116
|
+
member = tar.getmember("bin/micromamba")
|
|
117
|
+
tar.extract(member, micromamba_dir)
|
|
118
|
+
break
|
|
119
|
+
except (URLError, IOError) as e:
|
|
120
|
+
if attempt == max_retries - 1:
|
|
121
|
+
raise Exception(
|
|
122
|
+
f"Failed to download micromamba after {max_retries} attempts: {e}"
|
|
123
|
+
)
|
|
124
|
+
time.sleep(2**attempt)
|
|
60
125
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
126
|
+
try:
|
|
127
|
+
# first try from mirror
|
|
128
|
+
_download_and_extract(mirror_url)
|
|
129
|
+
except Exception:
|
|
130
|
+
# download from mirror failed, try official source before failing.
|
|
131
|
+
_download_and_extract(url)
|
|
67
132
|
|
|
68
|
-
|
|
69
|
-
|
|
133
|
+
# Set executable permission
|
|
134
|
+
os.chmod(micromamba_path, 0o755)
|
|
70
135
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
136
|
+
# Update PATH only once at the end
|
|
137
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
|
138
|
+
return micromamba_path
|
|
139
|
+
|
|
140
|
+
@timer
|
|
141
|
+
def download_conda_packages(storage, packages, dest_dir):
|
|
142
|
+
def process_conda_package(args):
|
|
75
143
|
# Ensure that conda packages go into architecture specific folders.
|
|
76
144
|
# The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
|
|
77
145
|
# Micromamba into believing that all packages are coming from a local
|
|
78
146
|
# channel - the only hurdle is ensuring that packages are organised
|
|
79
147
|
# properly.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
|
|
148
|
+
key, tmpfile, dest_dir = args
|
|
149
|
+
dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
|
83
150
|
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
84
151
|
shutil.move(tmpfile, dest)
|
|
85
152
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
153
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
154
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
|
155
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
156
|
+
executor.map(
|
|
157
|
+
process_conda_package,
|
|
158
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
|
159
|
+
)
|
|
160
|
+
# for key, tmpfile, _ in results:
|
|
161
|
+
|
|
162
|
+
# # TODO: consider RAM disk
|
|
163
|
+
# dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
|
164
|
+
# os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
165
|
+
# shutil.move(tmpfile, dest)
|
|
166
|
+
return dest_dir
|
|
167
|
+
|
|
168
|
+
@timer
|
|
169
|
+
def download_pypi_packages(storage, packages, dest_dir):
|
|
170
|
+
def process_pypi_package(args):
|
|
171
|
+
key, tmpfile, dest_dir = args
|
|
172
|
+
dest = os.path.join(dest_dir, os.path.basename(key))
|
|
173
|
+
shutil.move(tmpfile, dest)
|
|
174
|
+
|
|
175
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
176
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
|
177
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
178
|
+
executor.map(
|
|
179
|
+
process_pypi_package,
|
|
180
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
|
181
|
+
)
|
|
182
|
+
# for key, tmpfile, _ in results:
|
|
183
|
+
# dest = os.path.join(dest_dir, os.path.basename(key))
|
|
184
|
+
# shutil.move(tmpfile, dest)
|
|
185
|
+
return dest_dir
|
|
186
|
+
|
|
187
|
+
@timer
|
|
188
|
+
def create_conda_environment(prefix, conda_pkgs_dir):
|
|
189
|
+
cmd = f'''set -e;
|
|
190
|
+
tmpfile=$(mktemp);
|
|
191
|
+
echo "@EXPLICIT" > "$tmpfile";
|
|
192
|
+
ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
|
|
193
|
+
export PATH=$PATH:$(pwd)/micromamba;
|
|
194
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
195
|
+
export MAMBA_NO_LOW_SPEED_LIMIT=1;
|
|
196
|
+
export MAMBA_USE_INDEX_CACHE=1;
|
|
197
|
+
export MAMBA_NO_PROGRESS_BARS=1;
|
|
198
|
+
export CONDA_FETCH_THREADS=1;
|
|
199
|
+
micromamba create --yes --offline --no-deps \
|
|
200
|
+
--safety-checks=disabled --no-extra-safety-checks \
|
|
201
|
+
--prefix {prefix} --file "$tmpfile" \
|
|
202
|
+
--no-pyc --no-rc --always-copy;
|
|
203
|
+
rm "$tmpfile"'''
|
|
204
|
+
run_cmd(cmd)
|
|
205
|
+
|
|
206
|
+
@timer
|
|
207
|
+
def install_pypi_packages(prefix, pypi_pkgs_dir):
|
|
208
|
+
cmd = f"""set -e;
|
|
94
209
|
export PATH=$PATH:$(pwd)/micromamba;
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
210
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
211
|
+
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
|
|
212
|
+
install --root-user-action=ignore --no-compile --no-index \
|
|
213
|
+
--no-cache-dir --no-deps --prefer-binary \
|
|
214
|
+
--find-links={pypi_pkgs_dir} --no-user \
|
|
215
|
+
--no-warn-script-location --no-input \
|
|
216
|
+
{pypi_pkgs_dir}/*.whl
|
|
217
|
+
"""
|
|
218
|
+
run_cmd(cmd)
|
|
219
|
+
|
|
220
|
+
@timer
|
|
221
|
+
def setup_environment(
|
|
222
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
|
223
|
+
):
|
|
224
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
225
|
+
# install micromamba, download conda and pypi packages in parallel
|
|
226
|
+
futures = {
|
|
227
|
+
"micromamba": executor.submit(install_micromamba, architecture),
|
|
228
|
+
"conda_pkgs": executor.submit(
|
|
229
|
+
download_conda_packages, storage, env["conda"], conda_pkgs_dir
|
|
230
|
+
),
|
|
231
|
+
}
|
|
232
|
+
if "pypi" in env:
|
|
233
|
+
futures["pypi_pkgs"] = executor.submit(
|
|
234
|
+
download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# create conda environment after micromamba is installed and conda packages are downloaded
|
|
238
|
+
done, _ = concurrent.futures.wait(
|
|
239
|
+
[futures["micromamba"], futures["conda_pkgs"]],
|
|
240
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
for future in done:
|
|
244
|
+
future.result()
|
|
245
|
+
|
|
246
|
+
# start conda environment creation
|
|
247
|
+
futures["conda_env"] = executor.submit(
|
|
248
|
+
create_conda_environment, prefix, conda_pkgs_dir
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if "pypi" in env:
|
|
252
|
+
# install pypi packages after conda environment is created and pypi packages are downloaded
|
|
253
|
+
done, _ = concurrent.futures.wait(
|
|
254
|
+
[futures["conda_env"], futures["pypi_pkgs"]],
|
|
255
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
for future in done:
|
|
259
|
+
future.result()
|
|
260
|
+
|
|
261
|
+
# install pypi packages
|
|
262
|
+
futures["pypi_install"] = executor.submit(
|
|
263
|
+
install_pypi_packages, prefix, pypi_pkgs_dir
|
|
264
|
+
)
|
|
265
|
+
# wait for pypi packages to be installed
|
|
266
|
+
futures["pypi_install"].result()
|
|
267
|
+
else:
|
|
268
|
+
# wait for conda environment to be created
|
|
269
|
+
futures["conda_env"].result()
|
|
270
|
+
|
|
271
|
+
if len(sys.argv) != 5:
|
|
272
|
+
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
|
273
|
+
sys.exit(1)
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
_, flow_name, id_, datastore_type, architecture = sys.argv
|
|
277
|
+
|
|
278
|
+
prefix = os.path.join(os.getcwd(), architecture, id_)
|
|
279
|
+
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
|
280
|
+
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
|
113
281
|
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
cmds.extend(
|
|
124
|
-
[
|
|
125
|
-
f"""set -e;
|
|
126
|
-
export PATH=$PATH:$(pwd)/micromamba;
|
|
127
|
-
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
128
|
-
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
|
|
129
|
-
]
|
|
282
|
+
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
|
283
|
+
|
|
284
|
+
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
|
285
|
+
if not datastores:
|
|
286
|
+
print(f"No datastore found for type: {datastore_type}")
|
|
287
|
+
sys.exit(1)
|
|
288
|
+
|
|
289
|
+
storage = datastores[0](
|
|
290
|
+
_datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
|
|
130
291
|
)
|
|
131
292
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
293
|
+
# Move MAGIC_FILE inside local datastore.
|
|
294
|
+
os.makedirs(manifest_dir, exist_ok=True)
|
|
295
|
+
shutil.move(
|
|
296
|
+
os.path.join(os.getcwd(), MAGIC_FILE),
|
|
297
|
+
os.path.join(manifest_dir, MAGIC_FILE),
|
|
135
298
|
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
299
|
+
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
|
300
|
+
env = json.load(f)[id_][architecture]
|
|
301
|
+
|
|
302
|
+
setup_environment(
|
|
303
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
except Exception as e:
|
|
307
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
|
308
|
+
sys.exit(1)
|
|
@@ -50,20 +50,26 @@ class CondaStepDecorator(StepDecorator):
|
|
|
50
50
|
# conda channels, users can specify channel::package as the package name.
|
|
51
51
|
|
|
52
52
|
def __init__(self, attributes=None, statically_defined=False):
|
|
53
|
-
self.
|
|
54
|
-
attributes.
|
|
53
|
+
self._attributes_with_user_values = (
|
|
54
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
55
55
|
)
|
|
56
|
+
|
|
56
57
|
super(CondaStepDecorator, self).__init__(attributes, statically_defined)
|
|
57
58
|
|
|
59
|
+
def init(self):
|
|
60
|
+
super(CondaStepDecorator, self).init()
|
|
61
|
+
|
|
58
62
|
# Support legacy 'libraries=' attribute for the decorator.
|
|
59
63
|
self.attributes["packages"] = {
|
|
60
64
|
**self.attributes["libraries"],
|
|
61
65
|
**self.attributes["packages"],
|
|
62
66
|
}
|
|
63
67
|
del self.attributes["libraries"]
|
|
68
|
+
if self.attributes["packages"]:
|
|
69
|
+
self._attributes_with_user_values.add("packages")
|
|
64
70
|
|
|
65
71
|
def is_attribute_user_defined(self, name):
|
|
66
|
-
return name in self.
|
|
72
|
+
return name in self._attributes_with_user_values
|
|
67
73
|
|
|
68
74
|
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
69
75
|
# The init_environment hook for Environment creates the relevant virtual
|
|
@@ -83,10 +89,10 @@ class CondaStepDecorator(StepDecorator):
|
|
|
83
89
|
**super_attributes["packages"],
|
|
84
90
|
**self.attributes["packages"],
|
|
85
91
|
}
|
|
86
|
-
self.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
self._attributes_with_user_values.update(
|
|
93
|
+
conda_base._attributes_with_user_values
|
|
94
|
+
)
|
|
95
|
+
|
|
90
96
|
self.attributes["python"] = (
|
|
91
97
|
self.attributes["python"] or super_attributes["python"]
|
|
92
98
|
)
|
|
@@ -333,11 +339,15 @@ class CondaFlowDecorator(FlowDecorator):
|
|
|
333
339
|
}
|
|
334
340
|
|
|
335
341
|
def __init__(self, attributes=None, statically_defined=False):
|
|
336
|
-
self.
|
|
337
|
-
attributes.
|
|
342
|
+
self._attributes_with_user_values = (
|
|
343
|
+
set(attributes.keys()) if attributes is not None else set()
|
|
338
344
|
)
|
|
345
|
+
|
|
339
346
|
super(CondaFlowDecorator, self).__init__(attributes, statically_defined)
|
|
340
347
|
|
|
348
|
+
def init(self):
|
|
349
|
+
super(CondaFlowDecorator, self).init()
|
|
350
|
+
|
|
341
351
|
# Support legacy 'libraries=' attribute for the decorator.
|
|
342
352
|
self.attributes["packages"] = {
|
|
343
353
|
**self.attributes["libraries"],
|
|
@@ -348,7 +358,7 @@ class CondaFlowDecorator(FlowDecorator):
|
|
|
348
358
|
self.attributes["python"] = str(self.attributes["python"])
|
|
349
359
|
|
|
350
360
|
def is_attribute_user_defined(self, name):
|
|
351
|
-
return name in self.
|
|
361
|
+
return name in self._attributes_with_user_values
|
|
352
362
|
|
|
353
363
|
def flow_init(
|
|
354
364
|
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
|
|
@@ -5,21 +5,17 @@ import functools
|
|
|
5
5
|
import io
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
|
-
import sys
|
|
9
8
|
import tarfile
|
|
10
|
-
import
|
|
11
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
import threading
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
|
+
from functools import wraps
|
|
12
12
|
from hashlib import sha256
|
|
13
13
|
from io import BufferedIOBase, BytesIO
|
|
14
|
-
from itertools import chain
|
|
15
14
|
from urllib.parse import unquote, urlparse
|
|
16
15
|
|
|
17
|
-
import requests
|
|
18
|
-
|
|
19
16
|
from metaflow.exception import MetaflowException
|
|
20
17
|
from metaflow.metaflow_config import get_pinned_conda_libs
|
|
21
18
|
from metaflow.metaflow_environment import MetaflowEnvironment
|
|
22
|
-
from metaflow.metaflow_profile import profile
|
|
23
19
|
|
|
24
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
|
25
21
|
from .utils import conda_platform
|
|
@@ -50,7 +46,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
50
46
|
|
|
51
47
|
def validate_environment(self, logger, datastore_type):
|
|
52
48
|
self.datastore_type = datastore_type
|
|
53
|
-
self.logger = logger
|
|
54
49
|
|
|
55
50
|
# Avoiding circular imports.
|
|
56
51
|
from metaflow.plugins import DATASTORES
|
|
@@ -62,8 +57,21 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
62
57
|
from .micromamba import Micromamba
|
|
63
58
|
from .pip import Pip
|
|
64
59
|
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
print_lock = threading.Lock()
|
|
61
|
+
|
|
62
|
+
def make_thread_safe(func):
|
|
63
|
+
@wraps(func)
|
|
64
|
+
def wrapper(*args, **kwargs):
|
|
65
|
+
with print_lock:
|
|
66
|
+
return func(*args, **kwargs)
|
|
67
|
+
|
|
68
|
+
return wrapper
|
|
69
|
+
|
|
70
|
+
self.logger = make_thread_safe(logger)
|
|
71
|
+
|
|
72
|
+
# TODO: Wire up logging
|
|
73
|
+
micromamba = Micromamba(self.logger)
|
|
74
|
+
self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
|
|
67
75
|
|
|
68
76
|
def init_environment(self, echo, only_steps=None):
|
|
69
77
|
# The implementation optimizes for latency to ensure as many operations can
|
|
@@ -150,6 +158,9 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
150
158
|
(
|
|
151
159
|
package["path"],
|
|
152
160
|
# Lazily fetch package from the interweb if needed.
|
|
161
|
+
# TODO: Depending on the len_hint, the package might be downloaded from
|
|
162
|
+
# the interweb prematurely. save_bytes needs to be adjusted to handle
|
|
163
|
+
# this scenario.
|
|
153
164
|
LazyOpen(
|
|
154
165
|
package["local_path"],
|
|
155
166
|
"rb",
|
|
@@ -166,22 +177,60 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
166
177
|
if id_ in dirty:
|
|
167
178
|
self.write_to_environment_manifest([id_, platform, type_], packages)
|
|
168
179
|
|
|
169
|
-
|
|
180
|
+
storage = None
|
|
181
|
+
if self.datastore_type not in ["local"]:
|
|
182
|
+
# Initialize storage for caching if using a remote datastore
|
|
183
|
+
storage = self.datastore(_datastore_packageroot(self.datastore, echo))
|
|
184
|
+
|
|
170
185
|
self.logger("Bootstrapping virtual environment(s) ...")
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
186
|
+
# Sequence of operations:
|
|
187
|
+
# 1. Start all conda solves in parallel
|
|
188
|
+
# 2. Download conda packages sequentially
|
|
189
|
+
# 3. Create and cache conda environments in parallel
|
|
190
|
+
# 4. Start PyPI solves in parallel after each conda environment is created
|
|
191
|
+
# 5. Download PyPI packages sequentially
|
|
192
|
+
# 6. Create and cache PyPI environments in parallel
|
|
193
|
+
|
|
194
|
+
with ThreadPoolExecutor() as executor:
|
|
195
|
+
# Start all conda solves in parallel
|
|
196
|
+
conda_futures = [
|
|
197
|
+
executor.submit(lambda x: solve(*x, "conda"), env)
|
|
198
|
+
for env in environments("conda")
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
pypi_envs = {env[0]: env for env in environments("pypi")}
|
|
202
|
+
pypi_futures = []
|
|
203
|
+
|
|
204
|
+
# Process conda results sequentially for downloads
|
|
205
|
+
for future in as_completed(conda_futures):
|
|
206
|
+
result = future.result()
|
|
207
|
+
# Sequential conda download
|
|
208
|
+
self.solvers["conda"].download(*result)
|
|
209
|
+
# Parallel conda create and cache
|
|
210
|
+
create_future = executor.submit(self.solvers["conda"].create, *result)
|
|
211
|
+
if storage:
|
|
212
|
+
executor.submit(cache, storage, [result], "conda")
|
|
213
|
+
|
|
214
|
+
# Queue PyPI solve to start after conda create
|
|
215
|
+
if result[0] in pypi_envs:
|
|
216
|
+
|
|
217
|
+
def pypi_solve(env):
|
|
218
|
+
create_future.result() # Wait for conda create
|
|
219
|
+
return solve(*env, "pypi")
|
|
220
|
+
|
|
221
|
+
pypi_futures.append(
|
|
222
|
+
executor.submit(pypi_solve, pypi_envs[result[0]])
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Process PyPI results sequentially for downloads
|
|
226
|
+
for solve_future in pypi_futures:
|
|
227
|
+
result = solve_future.result()
|
|
228
|
+
# Sequential PyPI download
|
|
229
|
+
self.solvers["pypi"].download(*result)
|
|
230
|
+
# Parallel PyPI create and cache
|
|
231
|
+
executor.submit(self.solvers["pypi"].create, *result)
|
|
232
|
+
if storage:
|
|
233
|
+
executor.submit(cache, storage, [result], "pypi")
|
|
185
234
|
self.logger("Virtual environment(s) bootstrapped!")
|
|
186
235
|
|
|
187
236
|
def executable(self, step_name, default=None):
|
|
@@ -385,7 +434,8 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
385
434
|
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
|
|
386
435
|
% (self.flow.name, id_, self.datastore_type),
|
|
387
436
|
"echo 'Environment bootstrapped.'",
|
|
388
|
-
|
|
437
|
+
# To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
|
|
438
|
+
"export PATH=$PATH:$(pwd)/micromamba/bin",
|
|
389
439
|
]
|
|
390
440
|
else:
|
|
391
441
|
# for @conda/@pypi(disabled=True).
|
|
@@ -446,6 +496,7 @@ class LazyOpen(BufferedIOBase):
|
|
|
446
496
|
self._file = None
|
|
447
497
|
self._buffer = None
|
|
448
498
|
self._position = 0
|
|
499
|
+
self.requests = None
|
|
449
500
|
|
|
450
501
|
def _ensure_file(self):
|
|
451
502
|
if not self._file:
|
|
@@ -462,8 +513,13 @@ class LazyOpen(BufferedIOBase):
|
|
|
462
513
|
raise ValueError("Both filename and url are missing")
|
|
463
514
|
|
|
464
515
|
def _download_to_buffer(self):
|
|
516
|
+
if self.requests is None:
|
|
517
|
+
# TODO: Remove dependency on requests
|
|
518
|
+
import requests
|
|
519
|
+
|
|
520
|
+
self.requests = requests
|
|
465
521
|
# TODO: Stream it in chunks?
|
|
466
|
-
response = requests.get(self.url, stream=True)
|
|
522
|
+
response = self.requests.get(self.url, stream=True)
|
|
467
523
|
response.raise_for_status()
|
|
468
524
|
return response.content
|
|
469
525
|
|