metaflow 2.12.39__py2.py3-none-any.whl → 2.13__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metaflow_config.py +5 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/pypi/bootstrap.py +196 -61
- metaflow/plugins/pypi/conda_environment.py +76 -21
- metaflow/plugins/pypi/micromamba.py +42 -15
- metaflow/plugins/pypi/pip.py +8 -3
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/runner/utils.py +5 -0
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/METADATA +2 -2
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/RECORD +34 -34
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/LICENSE +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/WHEEL +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import bz2
|
2
|
+
import concurrent.futures
|
2
3
|
import io
|
3
4
|
import json
|
4
5
|
import os
|
@@ -6,6 +7,9 @@ import shutil
|
|
6
7
|
import subprocess
|
7
8
|
import sys
|
8
9
|
import tarfile
|
10
|
+
import time
|
11
|
+
|
12
|
+
import requests
|
9
13
|
|
10
14
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
11
15
|
from metaflow.plugins import DATASTORES
|
@@ -15,6 +19,18 @@ from . import MAGIC_FILE, _datastore_packageroot
|
|
15
19
|
|
16
20
|
# Bootstraps a valid conda virtual environment composed of conda and pypi packages
|
17
21
|
|
22
|
+
|
23
|
+
def timer(func):
|
24
|
+
def wrapper(*args, **kwargs):
|
25
|
+
start_time = time.time()
|
26
|
+
result = func(*args, **kwargs)
|
27
|
+
duration = time.time() - start_time
|
28
|
+
# print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
|
29
|
+
return result
|
30
|
+
|
31
|
+
return wrapper
|
32
|
+
|
33
|
+
|
18
34
|
if __name__ == "__main__":
|
19
35
|
if len(sys.argv) != 5:
|
20
36
|
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
@@ -47,6 +63,8 @@ if __name__ == "__main__":
|
|
47
63
|
|
48
64
|
prefix = os.path.join(os.getcwd(), architecture, id_)
|
49
65
|
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
66
|
+
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
67
|
+
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
50
68
|
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
51
69
|
|
52
70
|
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
@@ -64,77 +82,194 @@ if __name__ == "__main__":
|
|
64
82
|
os.path.join(os.getcwd(), MAGIC_FILE),
|
65
83
|
os.path.join(manifest_dir, MAGIC_FILE),
|
66
84
|
)
|
67
|
-
|
68
85
|
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
69
86
|
env = json.load(f)[id_][architecture]
|
70
87
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
88
|
+
def run_cmd(cmd):
|
89
|
+
result = subprocess.run(
|
90
|
+
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
91
|
+
)
|
92
|
+
if result.returncode != 0:
|
93
|
+
print(f"Bootstrap failed while executing: {cmd}")
|
94
|
+
print("Stdout:", result.stdout)
|
95
|
+
print("Stderr:", result.stderr)
|
96
|
+
sys.exit(1)
|
97
|
+
|
98
|
+
@timer
|
99
|
+
def install_micromamba(architecture):
|
100
|
+
micromamba_dir = os.path.join(os.getcwd(), "micromamba")
|
101
|
+
micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
|
102
|
+
|
103
|
+
if which("micromamba"):
|
104
|
+
return which("micromamba")
|
105
|
+
if os.path.exists(micromamba_path):
|
106
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
107
|
+
return micromamba_path
|
108
|
+
|
109
|
+
# Download and extract in one go
|
110
|
+
# TODO: Serve from cloudflare
|
111
|
+
url = f"https://micro.mamba.pm/api/micromamba/{architecture}/2.0.4"
|
112
|
+
|
113
|
+
# Prepare directory once
|
114
|
+
os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
|
115
|
+
|
116
|
+
# Stream and process directly to file
|
117
|
+
with requests.get(url, stream=True, timeout=30) as response:
|
118
|
+
if response.status_code != 200:
|
119
|
+
raise Exception(
|
120
|
+
f"Failed to download micromamba: HTTP {response.status_code}"
|
121
|
+
)
|
122
|
+
|
123
|
+
decompressor = bz2.BZ2Decompressor()
|
124
|
+
|
125
|
+
# Process in memory without temporary files
|
126
|
+
tar_content = decompressor.decompress(response.raw.read())
|
127
|
+
|
128
|
+
with tarfile.open(fileobj=io.BytesIO(tar_content), mode="r:") as tar:
|
129
|
+
member = tar.getmember("bin/micromamba")
|
130
|
+
# Extract directly to final location
|
131
|
+
with open(micromamba_path, "wb") as f:
|
132
|
+
f.write(tar.extractfile(member).read())
|
133
|
+
|
134
|
+
# Set executable permission
|
135
|
+
os.chmod(micromamba_path, 0o755)
|
136
|
+
|
137
|
+
# Update PATH only once at the end
|
138
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
139
|
+
return micromamba_path
|
140
|
+
|
141
|
+
@timer
|
142
|
+
def download_conda_packages(storage, packages, dest_dir):
|
143
|
+
|
144
|
+
def process_conda_package(args):
|
75
145
|
# Ensure that conda packages go into architecture specific folders.
|
76
146
|
# The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
|
77
147
|
# Micromamba into believing that all packages are coming from a local
|
78
148
|
# channel - the only hurdle is ensuring that packages are organised
|
79
149
|
# properly.
|
80
|
-
|
81
|
-
|
82
|
-
dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
|
150
|
+
key, tmpfile, dest_dir = args
|
151
|
+
dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
83
152
|
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
84
153
|
shutil.move(tmpfile, dest)
|
85
154
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
155
|
+
os.makedirs(dest_dir, exist_ok=True)
|
156
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
157
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
158
|
+
executor.map(
|
159
|
+
process_conda_package,
|
160
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
161
|
+
)
|
162
|
+
# for key, tmpfile, _ in results:
|
163
|
+
|
164
|
+
# # TODO: consider RAM disk
|
165
|
+
# dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
166
|
+
# os.makedirs(os.path.dirname(dest), exist_ok=True)
|
167
|
+
# shutil.move(tmpfile, dest)
|
168
|
+
return dest_dir
|
169
|
+
|
170
|
+
@timer
|
171
|
+
def download_pypi_packages(storage, packages, dest_dir):
|
172
|
+
|
173
|
+
def process_pypi_package(args):
|
174
|
+
key, tmpfile, dest_dir = args
|
175
|
+
dest = os.path.join(dest_dir, os.path.basename(key))
|
176
|
+
shutil.move(tmpfile, dest)
|
177
|
+
|
178
|
+
os.makedirs(dest_dir, exist_ok=True)
|
179
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
180
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
181
|
+
executor.map(
|
182
|
+
process_pypi_package,
|
183
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
184
|
+
)
|
185
|
+
# for key, tmpfile, _ in results:
|
186
|
+
# dest = os.path.join(dest_dir, os.path.basename(key))
|
187
|
+
# shutil.move(tmpfile, dest)
|
188
|
+
return dest_dir
|
189
|
+
|
190
|
+
@timer
|
191
|
+
def create_conda_environment(prefix, conda_pkgs_dir):
|
192
|
+
cmd = f'''set -e;
|
193
|
+
tmpfile=$(mktemp);
|
194
|
+
echo "@EXPLICIT" > "$tmpfile";
|
195
|
+
ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
|
94
196
|
export PATH=$PATH:$(pwd)/micromamba;
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
107
|
-
micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
|
108
|
-
rm "$tmpfile"''',
|
109
|
-
]
|
110
|
-
|
111
|
-
# Download PyPI packages.
|
112
|
-
if "pypi" in env:
|
113
|
-
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
114
|
-
with storage.load_bytes(
|
115
|
-
[package["path"] for package in env["pypi"]]
|
116
|
-
) as results:
|
117
|
-
for key, tmpfile, _ in results:
|
118
|
-
dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
|
119
|
-
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
120
|
-
shutil.move(tmpfile, dest)
|
121
|
-
|
122
|
-
# Install PyPI packages.
|
123
|
-
cmds.extend(
|
124
|
-
[
|
125
|
-
f"""set -e;
|
126
|
-
export PATH=$PATH:$(pwd)/micromamba;
|
127
|
-
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
128
|
-
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
|
129
|
-
]
|
130
|
-
)
|
197
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
198
|
+
export MAMBA_NO_LOW_SPEED_LIMIT=1;
|
199
|
+
export MAMBA_USE_INDEX_CACHE=1;
|
200
|
+
export MAMBA_NO_PROGRESS_BARS=1;
|
201
|
+
export CONDA_FETCH_THREADS=1;
|
202
|
+
micromamba create --yes --offline --no-deps \
|
203
|
+
--safety-checks=disabled --no-extra-safety-checks \
|
204
|
+
--prefix {prefix} --file "$tmpfile" \
|
205
|
+
--no-pyc --no-rc --always-copy;
|
206
|
+
rm "$tmpfile"'''
|
207
|
+
run_cmd(cmd)
|
131
208
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
209
|
+
@timer
|
210
|
+
def install_pypi_packages(prefix, pypi_pkgs_dir):
|
211
|
+
|
212
|
+
cmd = f"""set -e;
|
213
|
+
export PATH=$PATH:$(pwd)/micromamba;
|
214
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
215
|
+
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
|
216
|
+
install --root-user-action=ignore --no-compile --no-index \
|
217
|
+
--no-cache-dir --no-deps --prefer-binary \
|
218
|
+
--find-links={pypi_pkgs_dir} --no-user \
|
219
|
+
--no-warn-script-location --no-input \
|
220
|
+
{pypi_pkgs_dir}/*.whl
|
221
|
+
"""
|
222
|
+
run_cmd(cmd)
|
223
|
+
|
224
|
+
@timer
|
225
|
+
def setup_environment(
|
226
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
227
|
+
):
|
228
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
229
|
+
# install micromamba, download conda and pypi packages in parallel
|
230
|
+
futures = {
|
231
|
+
"micromamba": executor.submit(install_micromamba, architecture),
|
232
|
+
"conda_pkgs": executor.submit(
|
233
|
+
download_conda_packages, storage, env["conda"], conda_pkgs_dir
|
234
|
+
),
|
235
|
+
}
|
236
|
+
if "pypi" in env:
|
237
|
+
futures["pypi_pkgs"] = executor.submit(
|
238
|
+
download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
|
239
|
+
)
|
240
|
+
|
241
|
+
# create conda environment after micromamba is installed and conda packages are downloaded
|
242
|
+
done, _ = concurrent.futures.wait(
|
243
|
+
[futures["micromamba"], futures["conda_pkgs"]],
|
244
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
245
|
+
)
|
246
|
+
|
247
|
+
for future in done:
|
248
|
+
future.result()
|
249
|
+
|
250
|
+
# start conda environment creation
|
251
|
+
futures["conda_env"] = executor.submit(
|
252
|
+
create_conda_environment, prefix, conda_pkgs_dir
|
253
|
+
)
|
254
|
+
|
255
|
+
if "pypi" in env:
|
256
|
+
# install pypi packages after conda environment is created and pypi packages are downloaded
|
257
|
+
done, _ = concurrent.futures.wait(
|
258
|
+
[futures["conda_env"], futures["pypi_pkgs"]],
|
259
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
260
|
+
)
|
261
|
+
|
262
|
+
for future in done:
|
263
|
+
future.result()
|
264
|
+
|
265
|
+
# install pypi packages
|
266
|
+
futures["pypi_install"] = executor.submit(
|
267
|
+
install_pypi_packages, prefix, pypi_pkgs_dir
|
268
|
+
)
|
269
|
+
# wait for pypi packages to be installed
|
270
|
+
futures["pypi_install"].result()
|
271
|
+
else:
|
272
|
+
# wait for conda environment to be created
|
273
|
+
futures["conda_env"].result()
|
274
|
+
|
275
|
+
setup_environment(architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir)
|
@@ -5,10 +5,11 @@ import functools
|
|
5
5
|
import io
|
6
6
|
import json
|
7
7
|
import os
|
8
|
-
import sys
|
9
8
|
import tarfile
|
9
|
+
import threading
|
10
10
|
import time
|
11
|
-
from concurrent.futures import ThreadPoolExecutor
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
12
|
+
from functools import wraps
|
12
13
|
from hashlib import sha256
|
13
14
|
from io import BufferedIOBase, BytesIO
|
14
15
|
from itertools import chain
|
@@ -50,7 +51,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
50
51
|
|
51
52
|
def validate_environment(self, logger, datastore_type):
|
52
53
|
self.datastore_type = datastore_type
|
53
|
-
self.logger = logger
|
54
54
|
|
55
55
|
# Avoiding circular imports.
|
56
56
|
from metaflow.plugins import DATASTORES
|
@@ -62,8 +62,21 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
62
62
|
from .micromamba import Micromamba
|
63
63
|
from .pip import Pip
|
64
64
|
|
65
|
-
|
66
|
-
|
65
|
+
print_lock = threading.Lock()
|
66
|
+
|
67
|
+
def make_thread_safe(func):
|
68
|
+
@wraps(func)
|
69
|
+
def wrapper(*args, **kwargs):
|
70
|
+
with print_lock:
|
71
|
+
return func(*args, **kwargs)
|
72
|
+
|
73
|
+
return wrapper
|
74
|
+
|
75
|
+
self.logger = make_thread_safe(logger)
|
76
|
+
|
77
|
+
# TODO: Wire up logging
|
78
|
+
micromamba = Micromamba(self.logger)
|
79
|
+
self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
|
67
80
|
|
68
81
|
def init_environment(self, echo, only_steps=None):
|
69
82
|
# The implementation optimizes for latency to ensure as many operations can
|
@@ -150,6 +163,9 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
150
163
|
(
|
151
164
|
package["path"],
|
152
165
|
# Lazily fetch package from the interweb if needed.
|
166
|
+
# TODO: Depending on the len_hint, the package might be downloaded from
|
167
|
+
# the interweb prematurely. save_bytes needs to be adjusted to handle
|
168
|
+
# this scenario.
|
153
169
|
LazyOpen(
|
154
170
|
package["local_path"],
|
155
171
|
"rb",
|
@@ -166,22 +182,60 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
166
182
|
if id_ in dirty:
|
167
183
|
self.write_to_environment_manifest([id_, platform, type_], packages)
|
168
184
|
|
169
|
-
|
185
|
+
storage = None
|
186
|
+
if self.datastore_type not in ["local"]:
|
187
|
+
# Initialize storage for caching if using a remote datastore
|
188
|
+
storage = self.datastore(_datastore_packageroot(self.datastore, echo))
|
189
|
+
|
170
190
|
self.logger("Bootstrapping virtual environment(s) ...")
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
191
|
+
# Sequence of operations:
|
192
|
+
# 1. Start all conda solves in parallel
|
193
|
+
# 2. Download conda packages sequentially
|
194
|
+
# 3. Create and cache conda environments in parallel
|
195
|
+
# 4. Start PyPI solves in parallel after each conda environment is created
|
196
|
+
# 5. Download PyPI packages sequentially
|
197
|
+
# 6. Create and cache PyPI environments in parallel
|
198
|
+
|
199
|
+
with ThreadPoolExecutor() as executor:
|
200
|
+
# Start all conda solves in parallel
|
201
|
+
conda_futures = [
|
202
|
+
executor.submit(lambda x: solve(*x, "conda"), env)
|
203
|
+
for env in environments("conda")
|
204
|
+
]
|
205
|
+
|
206
|
+
pypi_envs = {env[0]: env for env in environments("pypi")}
|
207
|
+
pypi_futures = []
|
208
|
+
|
209
|
+
# Process conda results sequentially for downloads
|
210
|
+
for future in as_completed(conda_futures):
|
211
|
+
result = future.result()
|
212
|
+
# Sequential conda download
|
213
|
+
self.solvers["conda"].download(*result)
|
214
|
+
# Parallel conda create and cache
|
215
|
+
create_future = executor.submit(self.solvers["conda"].create, *result)
|
216
|
+
if storage:
|
217
|
+
executor.submit(cache, storage, [result], "conda")
|
218
|
+
|
219
|
+
# Queue PyPI solve to start after conda create
|
220
|
+
if result[0] in pypi_envs:
|
221
|
+
|
222
|
+
def pypi_solve(env):
|
223
|
+
create_future.result() # Wait for conda create
|
224
|
+
return solve(*env, "pypi")
|
225
|
+
|
226
|
+
pypi_futures.append(
|
227
|
+
executor.submit(pypi_solve, pypi_envs[result[0]])
|
228
|
+
)
|
229
|
+
|
230
|
+
# Process PyPI results sequentially for downloads
|
231
|
+
for solve_future in pypi_futures:
|
232
|
+
result = solve_future.result()
|
233
|
+
# Sequential PyPI download
|
234
|
+
self.solvers["pypi"].download(*result)
|
235
|
+
# Parallel PyPI create and cache
|
236
|
+
executor.submit(self.solvers["pypi"].create, *result)
|
237
|
+
if storage:
|
238
|
+
executor.submit(cache, storage, [result], "pypi")
|
185
239
|
self.logger("Virtual environment(s) bootstrapped!")
|
186
240
|
|
187
241
|
def executable(self, step_name, default=None):
|
@@ -382,7 +436,8 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
382
436
|
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
|
383
437
|
% (self.flow.name, id_, self.datastore_type),
|
384
438
|
"echo 'Environment bootstrapped.'",
|
385
|
-
|
439
|
+
# To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
|
440
|
+
"export PATH=$PATH:$(pwd)/micromamba/bin",
|
386
441
|
]
|
387
442
|
else:
|
388
443
|
# for @conda/@pypi(disabled=True).
|
@@ -1,7 +1,9 @@
|
|
1
|
+
import functools
|
1
2
|
import json
|
2
3
|
import os
|
3
4
|
import subprocess
|
4
5
|
import tempfile
|
6
|
+
import time
|
5
7
|
|
6
8
|
from metaflow.exception import MetaflowException
|
7
9
|
from metaflow.util import which
|
@@ -19,8 +21,11 @@ class MicromambaException(MetaflowException):
|
|
19
21
|
super(MicromambaException, self).__init__(msg)
|
20
22
|
|
21
23
|
|
24
|
+
GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
|
25
|
+
|
26
|
+
|
22
27
|
class Micromamba(object):
|
23
|
-
def __init__(self):
|
28
|
+
def __init__(self, logger=None):
|
24
29
|
# micromamba is a tiny version of the mamba package manager and comes with
|
25
30
|
# metaflow specific performance enhancements.
|
26
31
|
|
@@ -33,6 +38,12 @@ class Micromamba(object):
|
|
33
38
|
os.path.expanduser(_home),
|
34
39
|
"micromamba",
|
35
40
|
)
|
41
|
+
|
42
|
+
if logger:
|
43
|
+
self.logger = logger
|
44
|
+
else:
|
45
|
+
self.logger = lambda *args, **kwargs: None # No-op logger if not provided
|
46
|
+
|
36
47
|
self.bin = (
|
37
48
|
which(os.environ.get("METAFLOW_PATH_TO_MICROMAMBA") or "micromamba")
|
38
49
|
or which("./micromamba") # to support remote execution
|
@@ -70,6 +81,9 @@ class Micromamba(object):
|
|
70
81
|
"MAMBA_ADD_PIP_AS_PYTHON_DEPENDENCY": "true",
|
71
82
|
"CONDA_SUBDIR": platform,
|
72
83
|
# "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0" # https://github.com/conda/conda/issues/9862
|
84
|
+
# Add a default glibc version for linux-64 environments (ignored for other platforms)
|
85
|
+
# TODO: Make the version configurable
|
86
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
73
87
|
}
|
74
88
|
cmd = [
|
75
89
|
"create",
|
@@ -78,6 +92,7 @@ class Micromamba(object):
|
|
78
92
|
"--dry-run",
|
79
93
|
"--no-extra-safety-checks",
|
80
94
|
"--repodata-ttl=86400",
|
95
|
+
"--safety-checks=disabled",
|
81
96
|
"--retry-clean-cache",
|
82
97
|
"--prefix=%s/prefix" % tmp_dir,
|
83
98
|
]
|
@@ -91,10 +106,11 @@ class Micromamba(object):
|
|
91
106
|
cmd.append("python==%s" % python)
|
92
107
|
# TODO: Ensure a human readable message is returned when the environment
|
93
108
|
# can't be resolved for any and all reasons.
|
94
|
-
|
109
|
+
solved_packages = [
|
95
110
|
{k: v for k, v in item.items() if k in ["url"]}
|
96
111
|
for item in self._call(cmd, env)["actions"]["LINK"]
|
97
112
|
]
|
113
|
+
return solved_packages
|
98
114
|
|
99
115
|
def download(self, id_, packages, python, platform):
|
100
116
|
# Unfortunately all the packages need to be catalogued in package cache
|
@@ -103,8 +119,6 @@ class Micromamba(object):
|
|
103
119
|
# Micromamba is painfully slow in determining if many packages are infact
|
104
120
|
# already cached. As a perf heuristic, we check if the environment already
|
105
121
|
# exists to short circuit package downloads.
|
106
|
-
if self.path_to_environment(id_, platform):
|
107
|
-
return
|
108
122
|
|
109
123
|
prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
|
110
124
|
env_dirs=self.info()["envs_dirs"][0],
|
@@ -113,13 +127,18 @@ class Micromamba(object):
|
|
113
127
|
id=id_,
|
114
128
|
)
|
115
129
|
|
116
|
-
#
|
130
|
+
# cheap check
|
117
131
|
if os.path.exists(f"{prefix}/fake.done"):
|
118
132
|
return
|
119
133
|
|
134
|
+
# somewhat expensive check
|
135
|
+
if self.path_to_environment(id_, platform):
|
136
|
+
return
|
137
|
+
|
120
138
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
121
139
|
env = {
|
122
140
|
"CONDA_SUBDIR": platform,
|
141
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
123
142
|
}
|
124
143
|
cmd = [
|
125
144
|
"create",
|
@@ -159,6 +178,7 @@ class Micromamba(object):
|
|
159
178
|
# use hardlinks when possible, otherwise copy files
|
160
179
|
# disabled for now since it adds to environment creation latencies
|
161
180
|
"CONDA_ALLOW_SOFTLINKS": "0",
|
181
|
+
"CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
|
162
182
|
}
|
163
183
|
cmd = [
|
164
184
|
"create",
|
@@ -174,6 +194,7 @@ class Micromamba(object):
|
|
174
194
|
cmd.append("{url}".format(**package))
|
175
195
|
self._call(cmd, env)
|
176
196
|
|
197
|
+
@functools.lru_cache(maxsize=None)
|
177
198
|
def info(self):
|
178
199
|
return self._call(["config", "list", "-a"])
|
179
200
|
|
@@ -198,18 +219,24 @@ class Micromamba(object):
|
|
198
219
|
}
|
199
220
|
directories = self.info()["pkgs_dirs"]
|
200
221
|
# search all package caches for packages
|
201
|
-
|
202
|
-
|
222
|
+
|
223
|
+
file_to_path = {}
|
224
|
+
for d in directories:
|
225
|
+
if os.path.isdir(d):
|
226
|
+
try:
|
227
|
+
with os.scandir(d) as entries:
|
228
|
+
for entry in entries:
|
229
|
+
if entry.is_file():
|
230
|
+
# Prefer the first occurrence if the file exists in multiple directories
|
231
|
+
file_to_path.setdefault(entry.name, entry.path)
|
232
|
+
except OSError:
|
233
|
+
continue
|
234
|
+
ret = {
|
235
|
+
# set package tarball local paths to None if package tarballs are missing
|
236
|
+
url: file_to_path.get(file)
|
203
237
|
for url, file in packages_to_filenames.items()
|
204
|
-
for d in directories
|
205
|
-
if os.path.isdir(d)
|
206
|
-
and file in os.listdir(d)
|
207
|
-
and os.path.isfile(os.path.join(d, file))
|
208
238
|
}
|
209
|
-
|
210
|
-
for url in packages_to_filenames:
|
211
|
-
metadata.setdefault(url, None)
|
212
|
-
return metadata
|
239
|
+
return ret
|
213
240
|
|
214
241
|
def interpreter(self, id_):
|
215
242
|
return os.path.join(self.path_to_environment(id_), "bin/python")
|
metaflow/plugins/pypi/pip.py
CHANGED
@@ -4,6 +4,7 @@ import re
|
|
4
4
|
import shutil
|
5
5
|
import subprocess
|
6
6
|
import tempfile
|
7
|
+
import time
|
7
8
|
from concurrent.futures import ThreadPoolExecutor
|
8
9
|
from itertools import chain, product
|
9
10
|
from urllib.parse import unquote
|
@@ -50,10 +51,14 @@ INSTALLATION_MARKER = "{prefix}/.pip/id"
|
|
50
51
|
|
51
52
|
|
52
53
|
class Pip(object):
|
53
|
-
def __init__(self, micromamba=None):
|
54
|
+
def __init__(self, micromamba=None, logger=None):
|
54
55
|
# pip is assumed to be installed inside a conda environment managed by
|
55
56
|
# micromamba. pip commands are executed using `micromamba run --prefix`
|
56
|
-
self.micromamba = micromamba or Micromamba()
|
57
|
+
self.micromamba = micromamba or Micromamba(logger)
|
58
|
+
if logger:
|
59
|
+
self.logger = logger
|
60
|
+
else:
|
61
|
+
self.logger = lambda *args, **kwargs: None # No-op logger if not provided
|
57
62
|
|
58
63
|
def solve(self, id_, packages, python, platform):
|
59
64
|
prefix = self.micromamba.path_to_environment(id_)
|
@@ -123,7 +128,7 @@ class Pip(object):
|
|
123
128
|
**res,
|
124
129
|
subdir_str=(
|
125
130
|
"#subdirectory=%s" % subdirectory if subdirectory else ""
|
126
|
-
)
|
131
|
+
),
|
127
132
|
)
|
128
133
|
# used to deduplicate the storage location in case wheel does not
|
129
134
|
# build with enough unique identifiers.
|