metaflow 2.12.38__py2.py3-none-any.whl → 2.13__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. metaflow/__init__.py +1 -1
  2. metaflow/cli.py +111 -36
  3. metaflow/cli_args.py +2 -2
  4. metaflow/cli_components/run_cmds.py +3 -1
  5. metaflow/datastore/flow_datastore.py +2 -2
  6. metaflow/exception.py +8 -2
  7. metaflow/flowspec.py +48 -36
  8. metaflow/graph.py +28 -27
  9. metaflow/includefile.py +2 -2
  10. metaflow/lint.py +35 -20
  11. metaflow/metaflow_config.py +5 -0
  12. metaflow/parameters.py +11 -4
  13. metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
  14. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  15. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  16. metaflow/plugins/cards/card_creator.py +1 -0
  17. metaflow/plugins/cards/card_decorator.py +46 -8
  18. metaflow/plugins/pypi/bootstrap.py +196 -61
  19. metaflow/plugins/pypi/conda_decorator.py +14 -26
  20. metaflow/plugins/pypi/conda_environment.py +76 -21
  21. metaflow/plugins/pypi/micromamba.py +42 -15
  22. metaflow/plugins/pypi/pip.py +8 -3
  23. metaflow/plugins/pypi/pypi_decorator.py +10 -9
  24. metaflow/runner/click_api.py +175 -39
  25. metaflow/runner/deployer.py +1 -1
  26. metaflow/runner/deployer_impl.py +8 -3
  27. metaflow/runner/metaflow_runner.py +10 -2
  28. metaflow/runner/nbdeploy.py +2 -0
  29. metaflow/runner/nbrun.py +1 -1
  30. metaflow/runner/subprocess_manager.py +3 -1
  31. metaflow/runner/utils.py +41 -19
  32. metaflow/user_configs/config_options.py +87 -34
  33. metaflow/user_configs/config_parameters.py +44 -25
  34. metaflow/util.py +2 -2
  35. metaflow/version.py +1 -1
  36. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/METADATA +2 -2
  37. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/RECORD +41 -41
  38. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/LICENSE +0 -0
  39. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/WHEEL +0 -0
  40. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/entry_points.txt +0 -0
  41. {metaflow-2.12.38.dist-info → metaflow-2.13.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import bz2
2
+ import concurrent.futures
2
3
  import io
3
4
  import json
4
5
  import os
@@ -6,6 +7,9 @@ import shutil
6
7
  import subprocess
7
8
  import sys
8
9
  import tarfile
10
+ import time
11
+
12
+ import requests
9
13
 
10
14
  from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
11
15
  from metaflow.plugins import DATASTORES
@@ -15,6 +19,18 @@ from . import MAGIC_FILE, _datastore_packageroot
15
19
 
16
20
  # Bootstraps a valid conda virtual environment composed of conda and pypi packages
17
21
 
22
+
23
+ def timer(func):
24
+ def wrapper(*args, **kwargs):
25
+ start_time = time.time()
26
+ result = func(*args, **kwargs)
27
+ duration = time.time() - start_time
28
+ # print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
29
+ return result
30
+
31
+ return wrapper
32
+
33
+
18
34
  if __name__ == "__main__":
19
35
  if len(sys.argv) != 5:
20
36
  print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
@@ -47,6 +63,8 @@ if __name__ == "__main__":
47
63
 
48
64
  prefix = os.path.join(os.getcwd(), architecture, id_)
49
65
  pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
66
+ conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
67
+ pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
50
68
  manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
51
69
 
52
70
  datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
@@ -64,77 +82,194 @@ if __name__ == "__main__":
64
82
  os.path.join(os.getcwd(), MAGIC_FILE),
65
83
  os.path.join(manifest_dir, MAGIC_FILE),
66
84
  )
67
-
68
85
  with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
69
86
  env = json.load(f)[id_][architecture]
70
87
 
71
- # Download Conda packages.
72
- conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
73
- with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
74
- for key, tmpfile, _ in results:
88
+ def run_cmd(cmd):
89
+ result = subprocess.run(
90
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
91
+ )
92
+ if result.returncode != 0:
93
+ print(f"Bootstrap failed while executing: {cmd}")
94
+ print("Stdout:", result.stdout)
95
+ print("Stderr:", result.stderr)
96
+ sys.exit(1)
97
+
98
+ @timer
99
+ def install_micromamba(architecture):
100
+ micromamba_dir = os.path.join(os.getcwd(), "micromamba")
101
+ micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
102
+
103
+ if which("micromamba"):
104
+ return which("micromamba")
105
+ if os.path.exists(micromamba_path):
106
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
107
+ return micromamba_path
108
+
109
+ # Download and extract in one go
110
+ # TODO: Serve from cloudflare
111
+ url = f"https://micro.mamba.pm/api/micromamba/{architecture}/2.0.4"
112
+
113
+ # Prepare directory once
114
+ os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
115
+
116
+ # Stream and process directly to file
117
+ with requests.get(url, stream=True, timeout=30) as response:
118
+ if response.status_code != 200:
119
+ raise Exception(
120
+ f"Failed to download micromamba: HTTP {response.status_code}"
121
+ )
122
+
123
+ decompressor = bz2.BZ2Decompressor()
124
+
125
+ # Process in memory without temporary files
126
+ tar_content = decompressor.decompress(response.raw.read())
127
+
128
+ with tarfile.open(fileobj=io.BytesIO(tar_content), mode="r:") as tar:
129
+ member = tar.getmember("bin/micromamba")
130
+ # Extract directly to final location
131
+ with open(micromamba_path, "wb") as f:
132
+ f.write(tar.extractfile(member).read())
133
+
134
+ # Set executable permission
135
+ os.chmod(micromamba_path, 0o755)
136
+
137
+ # Update PATH only once at the end
138
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
139
+ return micromamba_path
140
+
141
+ @timer
142
+ def download_conda_packages(storage, packages, dest_dir):
143
+
144
+ def process_conda_package(args):
75
145
  # Ensure that conda packages go into architecture specific folders.
76
146
  # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
77
147
  # Micromamba into believing that all packages are coming from a local
78
148
  # channel - the only hurdle is ensuring that packages are organised
79
149
  # properly.
80
-
81
- # TODO: consider RAM disk
82
- dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
150
+ key, tmpfile, dest_dir = args
151
+ dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
83
152
  os.makedirs(os.path.dirname(dest), exist_ok=True)
84
153
  shutil.move(tmpfile, dest)
85
154
 
86
- # Create Conda environment.
87
- cmds = [
88
- # TODO: check if mamba or conda are already available on the image
89
- # TODO: micromamba installation can be pawned off to micromamba.py
90
- f"""set -e;
91
- if ! command -v micromamba >/dev/null 2>&1; then
92
- mkdir -p micromamba;
93
- python -c "import requests, bz2, sys; data = requests.get('https://micro.mamba.pm/api/micromamba/{architecture}/1.5.7').content; sys.stdout.buffer.write(bz2.decompress(data))" | tar -xv -C $(pwd)/micromamba bin/micromamba --strip-components 1;
155
+ os.makedirs(dest_dir, exist_ok=True)
156
+ with storage.load_bytes([package["path"] for package in packages]) as results:
157
+ with concurrent.futures.ThreadPoolExecutor() as executor:
158
+ executor.map(
159
+ process_conda_package,
160
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
161
+ )
162
+ # for key, tmpfile, _ in results:
163
+
164
+ # # TODO: consider RAM disk
165
+ # dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
166
+ # os.makedirs(os.path.dirname(dest), exist_ok=True)
167
+ # shutil.move(tmpfile, dest)
168
+ return dest_dir
169
+
170
+ @timer
171
+ def download_pypi_packages(storage, packages, dest_dir):
172
+
173
+ def process_pypi_package(args):
174
+ key, tmpfile, dest_dir = args
175
+ dest = os.path.join(dest_dir, os.path.basename(key))
176
+ shutil.move(tmpfile, dest)
177
+
178
+ os.makedirs(dest_dir, exist_ok=True)
179
+ with storage.load_bytes([package["path"] for package in packages]) as results:
180
+ with concurrent.futures.ThreadPoolExecutor() as executor:
181
+ executor.map(
182
+ process_pypi_package,
183
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
184
+ )
185
+ # for key, tmpfile, _ in results:
186
+ # dest = os.path.join(dest_dir, os.path.basename(key))
187
+ # shutil.move(tmpfile, dest)
188
+ return dest_dir
189
+
190
+ @timer
191
+ def create_conda_environment(prefix, conda_pkgs_dir):
192
+ cmd = f'''set -e;
193
+ tmpfile=$(mktemp);
194
+ echo "@EXPLICIT" > "$tmpfile";
195
+ ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
94
196
  export PATH=$PATH:$(pwd)/micromamba;
95
- if ! command -v micromamba >/dev/null 2>&1; then
96
- echo "Failed to install Micromamba!";
97
- exit 1;
98
- fi;
99
- fi""",
100
- # Create a conda environment through Micromamba.
101
- f'''set -e;
102
- tmpfile=$(mktemp);
103
- echo "@EXPLICIT" > "$tmpfile";
104
- ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
105
- export PATH=$PATH:$(pwd)/micromamba;
106
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
107
- micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
108
- rm "$tmpfile"''',
109
- ]
110
-
111
- # Download PyPI packages.
112
- if "pypi" in env:
113
- pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
114
- with storage.load_bytes(
115
- [package["path"] for package in env["pypi"]]
116
- ) as results:
117
- for key, tmpfile, _ in results:
118
- dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
119
- os.makedirs(os.path.dirname(dest), exist_ok=True)
120
- shutil.move(tmpfile, dest)
121
-
122
- # Install PyPI packages.
123
- cmds.extend(
124
- [
125
- f"""set -e;
126
- export PATH=$PATH:$(pwd)/micromamba;
127
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
128
- micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
129
- ]
130
- )
197
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
198
+ export MAMBA_NO_LOW_SPEED_LIMIT=1;
199
+ export MAMBA_USE_INDEX_CACHE=1;
200
+ export MAMBA_NO_PROGRESS_BARS=1;
201
+ export CONDA_FETCH_THREADS=1;
202
+ micromamba create --yes --offline --no-deps \
203
+ --safety-checks=disabled --no-extra-safety-checks \
204
+ --prefix {prefix} --file "$tmpfile" \
205
+ --no-pyc --no-rc --always-copy;
206
+ rm "$tmpfile"'''
207
+ run_cmd(cmd)
131
208
 
132
- for cmd in cmds:
133
- result = subprocess.run(
134
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
135
- )
136
- if result.returncode != 0:
137
- print(f"Bootstrap failed while executing: {cmd}")
138
- print("Stdout:", result.stdout.decode())
139
- print("Stderr:", result.stderr.decode())
140
- sys.exit(1)
209
+ @timer
210
+ def install_pypi_packages(prefix, pypi_pkgs_dir):
211
+
212
+ cmd = f"""set -e;
213
+ export PATH=$PATH:$(pwd)/micromamba;
214
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
215
+ micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
216
+ install --root-user-action=ignore --no-compile --no-index \
217
+ --no-cache-dir --no-deps --prefer-binary \
218
+ --find-links={pypi_pkgs_dir} --no-user \
219
+ --no-warn-script-location --no-input \
220
+ {pypi_pkgs_dir}/*.whl
221
+ """
222
+ run_cmd(cmd)
223
+
224
+ @timer
225
+ def setup_environment(
226
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
227
+ ):
228
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
229
+ # install micromamba, download conda and pypi packages in parallel
230
+ futures = {
231
+ "micromamba": executor.submit(install_micromamba, architecture),
232
+ "conda_pkgs": executor.submit(
233
+ download_conda_packages, storage, env["conda"], conda_pkgs_dir
234
+ ),
235
+ }
236
+ if "pypi" in env:
237
+ futures["pypi_pkgs"] = executor.submit(
238
+ download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
239
+ )
240
+
241
+ # create conda environment after micromamba is installed and conda packages are downloaded
242
+ done, _ = concurrent.futures.wait(
243
+ [futures["micromamba"], futures["conda_pkgs"]],
244
+ return_when=concurrent.futures.ALL_COMPLETED,
245
+ )
246
+
247
+ for future in done:
248
+ future.result()
249
+
250
+ # start conda environment creation
251
+ futures["conda_env"] = executor.submit(
252
+ create_conda_environment, prefix, conda_pkgs_dir
253
+ )
254
+
255
+ if "pypi" in env:
256
+ # install pypi packages after conda environment is created and pypi packages are downloaded
257
+ done, _ = concurrent.futures.wait(
258
+ [futures["conda_env"], futures["pypi_pkgs"]],
259
+ return_when=concurrent.futures.ALL_COMPLETED,
260
+ )
261
+
262
+ for future in done:
263
+ future.result()
264
+
265
+ # install pypi packages
266
+ futures["pypi_install"] = executor.submit(
267
+ install_pypi_packages, prefix, pypi_pkgs_dir
268
+ )
269
+ # wait for pypi packages to be installed
270
+ futures["pypi_install"].result()
271
+ else:
272
+ # wait for conda environment to be created
273
+ futures["conda_env"].result()
274
+
275
+ setup_environment(architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir)
@@ -50,31 +50,26 @@ class CondaStepDecorator(StepDecorator):
50
50
  # conda channels, users can specify channel::package as the package name.
51
51
 
52
52
  def __init__(self, attributes=None, statically_defined=False):
53
- self._user_defined_attributes = (
54
- attributes.copy() if attributes is not None else {}
53
+ self._attributes_with_user_values = (
54
+ set(attributes.keys()) if attributes is not None else set()
55
55
  )
56
+
56
57
  super(CondaStepDecorator, self).__init__(attributes, statically_defined)
57
58
 
58
59
  def init(self):
59
60
  super(CondaStepDecorator, self).init()
60
61
 
61
- # We have to go back and fixup _user_defined_attributes for potential
62
- # config resolution
63
- self._user_defined_attributes = {
64
- k: v
65
- for k, v in self.attributes.items()
66
- if k in self._user_defined_attributes
67
- }
68
-
69
62
  # Support legacy 'libraries=' attribute for the decorator.
70
63
  self.attributes["packages"] = {
71
64
  **self.attributes["libraries"],
72
65
  **self.attributes["packages"],
73
66
  }
74
67
  del self.attributes["libraries"]
68
+ if self.attributes["packages"]:
69
+ self._attributes_with_user_values.add("packages")
75
70
 
76
71
  def is_attribute_user_defined(self, name):
77
- return name in self._user_defined_attributes
72
+ return name in self._attributes_with_user_values
78
73
 
79
74
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
80
75
  # The init_environment hook for Environment creates the relevant virtual
@@ -94,10 +89,10 @@ class CondaStepDecorator(StepDecorator):
94
89
  **super_attributes["packages"],
95
90
  **self.attributes["packages"],
96
91
  }
97
- self._user_defined_attributes = {
98
- **self._user_defined_attributes,
99
- **conda_base._user_defined_attributes,
100
- }
92
+ self._attributes_with_user_values.update(
93
+ conda_base._attributes_with_user_values
94
+ )
95
+
101
96
  self.attributes["python"] = (
102
97
  self.attributes["python"] or super_attributes["python"]
103
98
  )
@@ -344,22 +339,15 @@ class CondaFlowDecorator(FlowDecorator):
344
339
  }
345
340
 
346
341
  def __init__(self, attributes=None, statically_defined=False):
347
- self._user_defined_attributes = (
348
- attributes.copy() if attributes is not None else {}
342
+ self._attributes_with_user_values = (
343
+ set(attributes.keys()) if attributes is not None else set()
349
344
  )
345
+
350
346
  super(CondaFlowDecorator, self).__init__(attributes, statically_defined)
351
347
 
352
348
  def init(self):
353
349
  super(CondaFlowDecorator, self).init()
354
350
 
355
- # We have to go back and fixup _user_defined_attributes for potential
356
- # config resolution
357
- self._user_defined_attributes = {
358
- k: v
359
- for k, v in self.attributes.items()
360
- if k in self._user_defined_attributes
361
- }
362
-
363
351
  # Support legacy 'libraries=' attribute for the decorator.
364
352
  self.attributes["packages"] = {
365
353
  **self.attributes["libraries"],
@@ -370,7 +358,7 @@ class CondaFlowDecorator(FlowDecorator):
370
358
  self.attributes["python"] = str(self.attributes["python"])
371
359
 
372
360
  def is_attribute_user_defined(self, name):
373
- return name in self._user_defined_attributes
361
+ return name in self._attributes_with_user_values
374
362
 
375
363
  def flow_init(
376
364
  self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
@@ -5,10 +5,11 @@ import functools
5
5
  import io
6
6
  import json
7
7
  import os
8
- import sys
9
8
  import tarfile
9
+ import threading
10
10
  import time
11
- from concurrent.futures import ThreadPoolExecutor
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from functools import wraps
12
13
  from hashlib import sha256
13
14
  from io import BufferedIOBase, BytesIO
14
15
  from itertools import chain
@@ -50,7 +51,6 @@ class CondaEnvironment(MetaflowEnvironment):
50
51
 
51
52
  def validate_environment(self, logger, datastore_type):
52
53
  self.datastore_type = datastore_type
53
- self.logger = logger
54
54
 
55
55
  # Avoiding circular imports.
56
56
  from metaflow.plugins import DATASTORES
@@ -62,8 +62,21 @@ class CondaEnvironment(MetaflowEnvironment):
62
62
  from .micromamba import Micromamba
63
63
  from .pip import Pip
64
64
 
65
- micromamba = Micromamba()
66
- self.solvers = {"conda": micromamba, "pypi": Pip(micromamba)}
65
+ print_lock = threading.Lock()
66
+
67
+ def make_thread_safe(func):
68
+ @wraps(func)
69
+ def wrapper(*args, **kwargs):
70
+ with print_lock:
71
+ return func(*args, **kwargs)
72
+
73
+ return wrapper
74
+
75
+ self.logger = make_thread_safe(logger)
76
+
77
+ # TODO: Wire up logging
78
+ micromamba = Micromamba(self.logger)
79
+ self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
67
80
 
68
81
  def init_environment(self, echo, only_steps=None):
69
82
  # The implementation optimizes for latency to ensure as many operations can
@@ -150,6 +163,9 @@ class CondaEnvironment(MetaflowEnvironment):
150
163
  (
151
164
  package["path"],
152
165
  # Lazily fetch package from the interweb if needed.
166
+ # TODO: Depending on the len_hint, the package might be downloaded from
167
+ # the interweb prematurely. save_bytes needs to be adjusted to handle
168
+ # this scenario.
153
169
  LazyOpen(
154
170
  package["local_path"],
155
171
  "rb",
@@ -166,22 +182,60 @@ class CondaEnvironment(MetaflowEnvironment):
166
182
  if id_ in dirty:
167
183
  self.write_to_environment_manifest([id_, platform, type_], packages)
168
184
 
169
- # First resolve environments through Conda, before PyPI.
185
+ storage = None
186
+ if self.datastore_type not in ["local"]:
187
+ # Initialize storage for caching if using a remote datastore
188
+ storage = self.datastore(_datastore_packageroot(self.datastore, echo))
189
+
170
190
  self.logger("Bootstrapping virtual environment(s) ...")
171
- for solver in ["conda", "pypi"]:
172
- with ThreadPoolExecutor() as executor:
173
- results = list(
174
- executor.map(lambda x: solve(*x, solver), environments(solver))
175
- )
176
- _ = list(map(lambda x: self.solvers[solver].download(*x), results))
177
- with ThreadPoolExecutor() as executor:
178
- _ = list(
179
- executor.map(lambda x: self.solvers[solver].create(*x), results)
180
- )
181
- if self.datastore_type not in ["local"]:
182
- # Cache packages only when a remote datastore is in play.
183
- storage = self.datastore(_datastore_packageroot(self.datastore, echo))
184
- cache(storage, results, solver)
191
+ # Sequence of operations:
192
+ # 1. Start all conda solves in parallel
193
+ # 2. Download conda packages sequentially
194
+ # 3. Create and cache conda environments in parallel
195
+ # 4. Start PyPI solves in parallel after each conda environment is created
196
+ # 5. Download PyPI packages sequentially
197
+ # 6. Create and cache PyPI environments in parallel
198
+
199
+ with ThreadPoolExecutor() as executor:
200
+ # Start all conda solves in parallel
201
+ conda_futures = [
202
+ executor.submit(lambda x: solve(*x, "conda"), env)
203
+ for env in environments("conda")
204
+ ]
205
+
206
+ pypi_envs = {env[0]: env for env in environments("pypi")}
207
+ pypi_futures = []
208
+
209
+ # Process conda results sequentially for downloads
210
+ for future in as_completed(conda_futures):
211
+ result = future.result()
212
+ # Sequential conda download
213
+ self.solvers["conda"].download(*result)
214
+ # Parallel conda create and cache
215
+ create_future = executor.submit(self.solvers["conda"].create, *result)
216
+ if storage:
217
+ executor.submit(cache, storage, [result], "conda")
218
+
219
+ # Queue PyPI solve to start after conda create
220
+ if result[0] in pypi_envs:
221
+
222
+ def pypi_solve(env):
223
+ create_future.result() # Wait for conda create
224
+ return solve(*env, "pypi")
225
+
226
+ pypi_futures.append(
227
+ executor.submit(pypi_solve, pypi_envs[result[0]])
228
+ )
229
+
230
+ # Process PyPI results sequentially for downloads
231
+ for solve_future in pypi_futures:
232
+ result = solve_future.result()
233
+ # Sequential PyPI download
234
+ self.solvers["pypi"].download(*result)
235
+ # Parallel PyPI create and cache
236
+ executor.submit(self.solvers["pypi"].create, *result)
237
+ if storage:
238
+ executor.submit(cache, storage, [result], "pypi")
185
239
  self.logger("Virtual environment(s) bootstrapped!")
186
240
 
187
241
  def executable(self, step_name, default=None):
@@ -382,7 +436,8 @@ class CondaEnvironment(MetaflowEnvironment):
382
436
  'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
383
437
  % (self.flow.name, id_, self.datastore_type),
384
438
  "echo 'Environment bootstrapped.'",
385
- "export PATH=$PATH:$(pwd)/micromamba",
439
+ # To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
440
+ "export PATH=$PATH:$(pwd)/micromamba/bin",
386
441
  ]
387
442
  else:
388
443
  # for @conda/@pypi(disabled=True).
@@ -1,7 +1,9 @@
1
+ import functools
1
2
  import json
2
3
  import os
3
4
  import subprocess
4
5
  import tempfile
6
+ import time
5
7
 
6
8
  from metaflow.exception import MetaflowException
7
9
  from metaflow.util import which
@@ -19,8 +21,11 @@ class MicromambaException(MetaflowException):
19
21
  super(MicromambaException, self).__init__(msg)
20
22
 
21
23
 
24
+ GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
25
+
26
+
22
27
  class Micromamba(object):
23
- def __init__(self):
28
+ def __init__(self, logger=None):
24
29
  # micromamba is a tiny version of the mamba package manager and comes with
25
30
  # metaflow specific performance enhancements.
26
31
 
@@ -33,6 +38,12 @@ class Micromamba(object):
33
38
  os.path.expanduser(_home),
34
39
  "micromamba",
35
40
  )
41
+
42
+ if logger:
43
+ self.logger = logger
44
+ else:
45
+ self.logger = lambda *args, **kwargs: None # No-op logger if not provided
46
+
36
47
  self.bin = (
37
48
  which(os.environ.get("METAFLOW_PATH_TO_MICROMAMBA") or "micromamba")
38
49
  or which("./micromamba") # to support remote execution
@@ -70,6 +81,9 @@ class Micromamba(object):
70
81
  "MAMBA_ADD_PIP_AS_PYTHON_DEPENDENCY": "true",
71
82
  "CONDA_SUBDIR": platform,
72
83
  # "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0" # https://github.com/conda/conda/issues/9862
84
+ # Add a default glibc version for linux-64 environments (ignored for other platforms)
85
+ # TODO: Make the version configurable
86
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
73
87
  }
74
88
  cmd = [
75
89
  "create",
@@ -78,6 +92,7 @@ class Micromamba(object):
78
92
  "--dry-run",
79
93
  "--no-extra-safety-checks",
80
94
  "--repodata-ttl=86400",
95
+ "--safety-checks=disabled",
81
96
  "--retry-clean-cache",
82
97
  "--prefix=%s/prefix" % tmp_dir,
83
98
  ]
@@ -91,10 +106,11 @@ class Micromamba(object):
91
106
  cmd.append("python==%s" % python)
92
107
  # TODO: Ensure a human readable message is returned when the environment
93
108
  # can't be resolved for any and all reasons.
94
- return [
109
+ solved_packages = [
95
110
  {k: v for k, v in item.items() if k in ["url"]}
96
111
  for item in self._call(cmd, env)["actions"]["LINK"]
97
112
  ]
113
+ return solved_packages
98
114
 
99
115
  def download(self, id_, packages, python, platform):
100
116
  # Unfortunately all the packages need to be catalogued in package cache
@@ -103,8 +119,6 @@ class Micromamba(object):
103
119
  # Micromamba is painfully slow in determining if many packages are infact
104
120
  # already cached. As a perf heuristic, we check if the environment already
105
121
  # exists to short circuit package downloads.
106
- if self.path_to_environment(id_, platform):
107
- return
108
122
 
109
123
  prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
110
124
  env_dirs=self.info()["envs_dirs"][0],
@@ -113,13 +127,18 @@ class Micromamba(object):
113
127
  id=id_,
114
128
  )
115
129
 
116
- # Another forced perf heuristic to skip cross-platform downloads.
130
+ # cheap check
117
131
  if os.path.exists(f"{prefix}/fake.done"):
118
132
  return
119
133
 
134
+ # somewhat expensive check
135
+ if self.path_to_environment(id_, platform):
136
+ return
137
+
120
138
  with tempfile.TemporaryDirectory() as tmp_dir:
121
139
  env = {
122
140
  "CONDA_SUBDIR": platform,
141
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
123
142
  }
124
143
  cmd = [
125
144
  "create",
@@ -159,6 +178,7 @@ class Micromamba(object):
159
178
  # use hardlinks when possible, otherwise copy files
160
179
  # disabled for now since it adds to environment creation latencies
161
180
  "CONDA_ALLOW_SOFTLINKS": "0",
181
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
162
182
  }
163
183
  cmd = [
164
184
  "create",
@@ -174,6 +194,7 @@ class Micromamba(object):
174
194
  cmd.append("{url}".format(**package))
175
195
  self._call(cmd, env)
176
196
 
197
+ @functools.lru_cache(maxsize=None)
177
198
  def info(self):
178
199
  return self._call(["config", "list", "-a"])
179
200
 
@@ -198,18 +219,24 @@ class Micromamba(object):
198
219
  }
199
220
  directories = self.info()["pkgs_dirs"]
200
221
  # search all package caches for packages
201
- metadata = {
202
- url: os.path.join(d, file)
222
+
223
+ file_to_path = {}
224
+ for d in directories:
225
+ if os.path.isdir(d):
226
+ try:
227
+ with os.scandir(d) as entries:
228
+ for entry in entries:
229
+ if entry.is_file():
230
+ # Prefer the first occurrence if the file exists in multiple directories
231
+ file_to_path.setdefault(entry.name, entry.path)
232
+ except OSError:
233
+ continue
234
+ ret = {
235
+ # set package tarball local paths to None if package tarballs are missing
236
+ url: file_to_path.get(file)
203
237
  for url, file in packages_to_filenames.items()
204
- for d in directories
205
- if os.path.isdir(d)
206
- and file in os.listdir(d)
207
- and os.path.isfile(os.path.join(d, file))
208
238
  }
209
- # set package tarball local paths to None if package tarballs are missing
210
- for url in packages_to_filenames:
211
- metadata.setdefault(url, None)
212
- return metadata
239
+ return ret
213
240
 
214
241
  def interpreter(self, id_):
215
242
  return os.path.join(self.path_to_environment(id_), "bin/python")