metaflow 2.12.39__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. metaflow/__init__.py +1 -1
  2. metaflow/cli.py +111 -36
  3. metaflow/cli_args.py +2 -2
  4. metaflow/cli_components/run_cmds.py +3 -1
  5. metaflow/datastore/flow_datastore.py +2 -2
  6. metaflow/exception.py +8 -2
  7. metaflow/flowspec.py +48 -36
  8. metaflow/graph.py +28 -27
  9. metaflow/includefile.py +2 -2
  10. metaflow/lint.py +35 -20
  11. metaflow/metadata_provider/heartbeat.py +23 -8
  12. metaflow/metaflow_config.py +7 -0
  13. metaflow/parameters.py +11 -4
  14. metaflow/plugins/argo/argo_client.py +0 -2
  15. metaflow/plugins/argo/argo_workflows.py +86 -104
  16. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  17. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  18. metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
  19. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  20. metaflow/plugins/aws/aws_utils.py +6 -1
  21. metaflow/plugins/aws/batch/batch_client.py +1 -3
  22. metaflow/plugins/aws/batch/batch_decorator.py +11 -11
  23. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  24. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  25. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  26. metaflow/plugins/aws/step_functions/step_functions.py +1 -1
  27. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  28. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  29. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
  30. metaflow/plugins/cards/card_creator.py +1 -0
  31. metaflow/plugins/cards/card_decorator.py +46 -8
  32. metaflow/plugins/kubernetes/kube_utils.py +55 -1
  33. metaflow/plugins/kubernetes/kubernetes.py +33 -80
  34. metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
  35. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
  36. metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
  37. metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
  38. metaflow/plugins/pypi/bootstrap.py +249 -81
  39. metaflow/plugins/pypi/conda_environment.py +83 -27
  40. metaflow/plugins/pypi/micromamba.py +77 -36
  41. metaflow/plugins/pypi/pip.py +9 -6
  42. metaflow/plugins/pypi/utils.py +4 -2
  43. metaflow/runner/click_api.py +175 -39
  44. metaflow/runner/deployer_impl.py +6 -1
  45. metaflow/runner/metaflow_runner.py +6 -1
  46. metaflow/runner/utils.py +5 -0
  47. metaflow/user_configs/config_options.py +87 -34
  48. metaflow/user_configs/config_parameters.py +44 -25
  49. metaflow/util.py +2 -2
  50. metaflow/version.py +1 -1
  51. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
  52. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/RECORD +56 -56
  53. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
  54. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
  55. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
  56. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
@@ -3,14 +3,17 @@ import sys
3
3
  import time
4
4
  import traceback
5
5
 
6
- from metaflow.plugins.kubernetes.kube_utils import parse_cli_options
6
+ from metaflow.plugins.kubernetes.kube_utils import (
7
+ parse_cli_options,
8
+ parse_kube_keyvalue_list,
9
+ )
7
10
  from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
8
11
  import metaflow.tracing as tracing
9
12
  from metaflow import JSONTypeClass, util
10
13
  from metaflow._vendor import click
11
14
  from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
12
15
  from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
13
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, KUBERNETES_LABELS
16
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
14
17
  from metaflow.mflog import TASK_LOG_SOURCE
15
18
  from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
16
19
 
@@ -18,9 +21,7 @@ from .kubernetes import (
18
21
  Kubernetes,
19
22
  KubernetesException,
20
23
  KubernetesKilledException,
21
- parse_kube_keyvalue_list,
22
24
  )
23
- from .kubernetes_decorator import KubernetesDecorator
24
25
 
25
26
 
26
27
  @click.group()
@@ -132,6 +133,18 @@ def kubernetes():
132
133
  type=str,
133
134
  help="Quality of Service class for the Kubernetes pod",
134
135
  )
136
+ @click.option(
137
+ "--labels",
138
+ default=None,
139
+ type=JSONTypeClass(),
140
+ multiple=False,
141
+ )
142
+ @click.option(
143
+ "--annotations",
144
+ default=None,
145
+ type=JSONTypeClass(),
146
+ multiple=False,
147
+ )
135
148
  @click.pass_context
136
149
  def step(
137
150
  ctx,
@@ -161,6 +174,8 @@ def step(
161
174
  port=None,
162
175
  num_parallel=None,
163
176
  qos=None,
177
+ labels=None,
178
+ annotations=None,
164
179
  **kwargs
165
180
  ):
166
181
  def echo(msg, stream="stderr", job_id=None, **kwargs):
@@ -302,8 +317,10 @@ def step(
302
317
  port=port,
303
318
  num_parallel=num_parallel,
304
319
  qos=qos,
320
+ labels=labels,
321
+ annotations=annotations,
305
322
  )
306
- except Exception as e:
323
+ except Exception:
307
324
  traceback.print_exc(chain=False)
308
325
  _sync_metadata()
309
326
  sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
@@ -19,6 +19,8 @@ from metaflow.metaflow_config import (
19
19
  KUBERNETES_GPU_VENDOR,
20
20
  KUBERNETES_IMAGE_PULL_POLICY,
21
21
  KUBERNETES_MEMORY,
22
+ KUBERNETES_LABELS,
23
+ KUBERNETES_ANNOTATIONS,
22
24
  KUBERNETES_NAMESPACE,
23
25
  KUBERNETES_NODE_SELECTOR,
24
26
  KUBERNETES_PERSISTENT_VOLUME_CLAIMS,
@@ -34,7 +36,8 @@ from metaflow.sidecar import Sidecar
34
36
  from metaflow.unbounded_foreach import UBF_CONTROL
35
37
 
36
38
  from ..aws.aws_utils import get_docker_registry, get_ec2_instance_metadata
37
- from .kubernetes import KubernetesException, parse_kube_keyvalue_list
39
+ from .kubernetes import KubernetesException
40
+ from .kube_utils import validate_kube_labels, parse_kube_keyvalue_list
38
41
 
39
42
  try:
40
43
  unicode
@@ -89,6 +92,10 @@ class KubernetesDecorator(StepDecorator):
89
92
  tolerations : List[str], default []
90
93
  The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
91
94
  Kubernetes tolerations to use when launching pod in Kubernetes.
95
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
96
+ Kubernetes labels to use when launching pod in Kubernetes.
97
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
98
+ Kubernetes annotations to use when launching pod in Kubernetes.
92
99
  use_tmpfs : bool, default False
93
100
  This enables an explicit tmpfs mount for this step.
94
101
  tmpfs_tempdir : bool, default True
@@ -131,6 +138,8 @@ class KubernetesDecorator(StepDecorator):
131
138
  "gpu_vendor": None,
132
139
  "tolerations": None, # e.g., [{"key": "arch", "operator": "Equal", "value": "amd"},
133
140
  # {"key": "foo", "operator": "Equal", "value": "bar"}]
141
+ "labels": None, # e.g. {"test-label": "value", "another-label":"value2"}
142
+ "annotations": None, # e.g. {"note": "value", "another-note": "value2"}
134
143
  "use_tmpfs": None,
135
144
  "tmpfs_tempdir": True,
136
145
  "tmpfs_size": None,
@@ -217,6 +226,36 @@ class KubernetesDecorator(StepDecorator):
217
226
  self.attributes["memory"] = KUBERNETES_MEMORY
218
227
  if self.attributes["disk"] == self.defaults["disk"] and KUBERNETES_DISK:
219
228
  self.attributes["disk"] = KUBERNETES_DISK
229
+ # Label source precedence (decreasing):
230
+ # - System labels (set outside of decorator)
231
+ # - Decorator labels: @kubernetes(labels={})
232
+ # - Environment variable labels: METAFLOW_KUBERNETES_LABELS=
233
+ deco_labels = {}
234
+ if self.attributes["labels"] is not None:
235
+ deco_labels = self.attributes["labels"]
236
+
237
+ env_labels = {}
238
+ if KUBERNETES_LABELS:
239
+ env_labels = parse_kube_keyvalue_list(KUBERNETES_LABELS.split(","), False)
240
+
241
+ self.attributes["labels"] = {**env_labels, **deco_labels}
242
+
243
+ # Annotations
244
+ # annotation precedence (decreasing):
245
+ # - System annotations (set outside of decorator)
246
+ # - Decorator annotations: @kubernetes(annotations={})
247
+ # - Environment annotations: METAFLOW_KUBERNETES_ANNOTATIONS=
248
+ deco_annotations = {}
249
+ if self.attributes["annotations"] is not None:
250
+ deco_annotations = self.attributes["annotations"]
251
+
252
+ env_annotations = {}
253
+ if KUBERNETES_ANNOTATIONS:
254
+ env_annotations = parse_kube_keyvalue_list(
255
+ KUBERNETES_ANNOTATIONS.split(","), False
256
+ )
257
+
258
+ self.attributes["annotations"] = {**env_annotations, **deco_annotations}
220
259
 
221
260
  # If no docker image is explicitly specified, impute a default image.
222
261
  if not self.attributes["image"]:
@@ -371,6 +410,9 @@ class KubernetesDecorator(StepDecorator):
371
410
  )
372
411
  )
373
412
 
413
+ validate_kube_labels(self.attributes["labels"])
414
+ # TODO: add validation to annotations as well?
415
+
374
416
  def package_init(self, flow, step_name, environment):
375
417
  try:
376
418
  # Kubernetes is a soft dependency.
@@ -426,7 +468,12 @@ class KubernetesDecorator(StepDecorator):
426
468
  "=".join([key, str(val)]) if val else key
427
469
  for key, val in v.items()
428
470
  ]
429
- elif k in ["tolerations", "persistent_volume_claims"]:
471
+ elif k in [
472
+ "tolerations",
473
+ "persistent_volume_claims",
474
+ "labels",
475
+ "annotations",
476
+ ]:
430
477
  cli_args.command_options[k] = json.dumps(v)
431
478
  else:
432
479
  cli_args.command_options[k] = v
@@ -1,22 +1,19 @@
1
- import copy
2
1
  import json
3
2
  import math
4
3
  import random
5
- import sys
6
4
  import time
7
5
 
8
6
  from metaflow.exception import MetaflowException
9
7
  from metaflow.metaflow_config import KUBERNETES_SECRETS
10
8
  from metaflow.tracing import inject_tracing_vars
11
- from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
12
9
 
13
10
  CLIENT_REFRESH_INTERVAL_SECONDS = 300
11
+
12
+ from .kube_utils import qos_requests_and_limits
14
13
  from .kubernetes_jobsets import (
15
14
  KubernetesJobSet,
16
15
  ) # We need this import for Kubernetes Client.
17
16
 
18
- from .kube_utils import qos_requests_and_limits
19
-
20
17
 
21
18
  class KubernetesJobException(MetaflowException):
22
19
  headline = "Kubernetes job error"
@@ -430,7 +427,7 @@ class RunningJob(object):
430
427
  def best_effort_kill():
431
428
  try:
432
429
  self.kill()
433
- except Exception as ex:
430
+ except Exception:
434
431
  pass
435
432
 
436
433
  atexit.register(best_effort_kill)
@@ -1,4 +1,3 @@
1
- import copy
2
1
  import json
3
2
  import math
4
3
  import random
@@ -7,7 +6,6 @@ from collections import namedtuple
7
6
  from metaflow.exception import MetaflowException
8
7
  from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_VERSION
9
8
  from metaflow.tracing import inject_tracing_vars
10
- from metaflow.metaflow_config import KUBERNETES_SECRETS
11
9
 
12
10
  from .kube_utils import qos_requests_and_limits
13
11
 
@@ -257,7 +255,7 @@ class RunningJobSet(object):
257
255
  def best_effort_kill():
258
256
  try:
259
257
  self.kill()
260
- except Exception as ex:
258
+ except Exception:
261
259
  pass
262
260
 
263
261
  atexit.register(best_effort_kill)
@@ -342,7 +340,7 @@ class RunningJobSet(object):
342
340
  stdout=True,
343
341
  tty=False,
344
342
  )
345
- except Exception as e:
343
+ except Exception:
346
344
  with client.ApiClient() as api_client:
347
345
  # If we are unable to kill the control pod then
348
346
  # Delete the jobset to kill the subsequent pods.
@@ -862,6 +860,16 @@ class KubernetesJobSet(object):
862
860
  self._annotations = dict(self._annotations, **{name: value})
863
861
  return self
864
862
 
863
+ def labels(self, labels):
864
+ for k, v in labels.items():
865
+ self.label(k, v)
866
+ return self
867
+
868
+ def annotations(self, annotations):
869
+ for k, v in annotations.items():
870
+ self.annotation(k, v)
871
+ return self
872
+
865
873
  def secret(self, name):
866
874
  self.worker.secret(name)
867
875
  self.control.secret(name)
@@ -987,15 +995,24 @@ class KubernetesArgoJobSet(object):
987
995
  self._labels = dict(self._labels, **{name: value})
988
996
  return self
989
997
 
998
+ def labels(self, labels):
999
+ for k, v in labels.items():
1000
+ self.label(k, v)
1001
+ return self
1002
+
990
1003
  def annotation(self, name, value):
991
1004
  self.worker.annotation(name, value)
992
1005
  self.control.annotation(name, value)
993
1006
  self._annotations = dict(self._annotations, **{name: value})
994
1007
  return self
995
1008
 
1009
+ def annotations(self, annotations):
1010
+ for k, v in annotations.items():
1011
+ self.annotation(k, v)
1012
+ return self
1013
+
996
1014
  def dump(self):
997
1015
  client = self._kubernetes_sdk
998
- import json
999
1016
 
1000
1017
  data = json.dumps(
1001
1018
  client.ApiClient().sanitize_for_serialization(
@@ -1,4 +1,5 @@
1
1
  import bz2
2
+ import concurrent.futures
2
3
  import io
3
4
  import json
4
5
  import os
@@ -6,21 +7,33 @@ import shutil
6
7
  import subprocess
7
8
  import sys
8
9
  import tarfile
9
-
10
+ import time
11
+ from urllib.error import URLError
12
+ from urllib.request import urlopen
10
13
  from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
11
14
  from metaflow.plugins import DATASTORES
15
+ from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
12
16
  from metaflow.util import which
17
+ from urllib.request import Request
18
+ import warnings
13
19
 
14
20
  from . import MAGIC_FILE, _datastore_packageroot
15
21
 
16
22
  # Bootstraps a valid conda virtual environment composed of conda and pypi packages
17
23
 
18
- if __name__ == "__main__":
19
- if len(sys.argv) != 5:
20
- print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
21
- sys.exit(1)
22
- _, flow_name, id_, datastore_type, architecture = sys.argv
23
24
 
25
+ def timer(func):
26
+ def wrapper(*args, **kwargs):
27
+ start_time = time.time()
28
+ result = func(*args, **kwargs)
29
+ duration = time.time() - start_time
30
+ # print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
31
+ return result
32
+
33
+ return wrapper
34
+
35
+
36
+ if __name__ == "__main__":
24
37
  # TODO: Detect architecture on the fly when dealing with arm architectures.
25
38
  # ARCH=$(uname -m)
26
39
  # OS=$(uname)
@@ -45,96 +58,251 @@ if __name__ == "__main__":
45
58
  # fi
46
59
  # fi
47
60
 
48
- prefix = os.path.join(os.getcwd(), architecture, id_)
49
- pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
50
- manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
61
+ def run_cmd(cmd):
62
+ result = subprocess.run(
63
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
64
+ )
65
+ if result.returncode != 0:
66
+ print(f"Bootstrap failed while executing: {cmd}")
67
+ print("Stdout:", result.stdout)
68
+ print("Stderr:", result.stderr)
69
+ sys.exit(1)
51
70
 
52
- datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
53
- if not datastores:
54
- print(f"No datastore found for type: {datastore_type}")
55
- sys.exit(1)
71
+ @timer
72
+ def install_micromamba(architecture):
73
+ micromamba_dir = os.path.join(os.getcwd(), "micromamba")
74
+ micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
75
+
76
+ if which("micromamba"):
77
+ return which("micromamba")
78
+ if os.path.exists(micromamba_path):
79
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
80
+ return micromamba_path
81
+
82
+ # Download and extract in one go
83
+ url = MICROMAMBA_URL.format(platform=architecture, version="2.0.4")
84
+ mirror_url = MICROMAMBA_MIRROR_URL.format(
85
+ platform=architecture, version="2.0.4"
86
+ )
87
+
88
+ # Prepare directory once
89
+ os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
90
+
91
+ # Download and decompress in one go
92
+ def _download_and_extract(url):
93
+ headers = {
94
+ "Accept-Encoding": "gzip, deflate, br",
95
+ "Connection": "keep-alive",
96
+ "User-Agent": "python-urllib",
97
+ }
98
+
99
+ max_retries = 3
100
+ for attempt in range(max_retries):
101
+ try:
102
+ req = Request(url, headers=headers)
56
103
 
57
- storage = datastores[0](
58
- _datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
59
- )
104
+ with urlopen(req) as response:
105
+ decompressor = bz2.BZ2Decompressor()
106
+ with warnings.catch_warnings():
107
+ warnings.filterwarnings(
108
+ "ignore", category=DeprecationWarning
109
+ )
110
+ with tarfile.open(
111
+ fileobj=io.BytesIO(
112
+ decompressor.decompress(response.read())
113
+ ),
114
+ mode="r:",
115
+ ) as tar:
116
+ member = tar.getmember("bin/micromamba")
117
+ tar.extract(member, micromamba_dir)
118
+ break
119
+ except (URLError, IOError) as e:
120
+ if attempt == max_retries - 1:
121
+ raise Exception(
122
+ f"Failed to download micromamba after {max_retries} attempts: {e}"
123
+ )
124
+ time.sleep(2**attempt)
60
125
 
61
- # Move MAGIC_FILE inside local datastore.
62
- os.makedirs(manifest_dir, exist_ok=True)
63
- shutil.move(
64
- os.path.join(os.getcwd(), MAGIC_FILE),
65
- os.path.join(manifest_dir, MAGIC_FILE),
66
- )
126
+ try:
127
+ # first try from mirror
128
+ _download_and_extract(mirror_url)
129
+ except Exception:
130
+ # download from mirror failed, try official source before failing.
131
+ _download_and_extract(url)
67
132
 
68
- with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
69
- env = json.load(f)[id_][architecture]
133
+ # Set executable permission
134
+ os.chmod(micromamba_path, 0o755)
70
135
 
71
- # Download Conda packages.
72
- conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
73
- with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
74
- for key, tmpfile, _ in results:
136
+ # Update PATH only once at the end
137
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
138
+ return micromamba_path
139
+
140
+ @timer
141
+ def download_conda_packages(storage, packages, dest_dir):
142
+ def process_conda_package(args):
75
143
  # Ensure that conda packages go into architecture specific folders.
76
144
  # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
77
145
  # Micromamba into believing that all packages are coming from a local
78
146
  # channel - the only hurdle is ensuring that packages are organised
79
147
  # properly.
80
-
81
- # TODO: consider RAM disk
82
- dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
148
+ key, tmpfile, dest_dir = args
149
+ dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
83
150
  os.makedirs(os.path.dirname(dest), exist_ok=True)
84
151
  shutil.move(tmpfile, dest)
85
152
 
86
- # Create Conda environment.
87
- cmds = [
88
- # TODO: check if mamba or conda are already available on the image
89
- # TODO: micromamba installation can be pawned off to micromamba.py
90
- f"""set -e;
91
- if ! command -v micromamba >/dev/null 2>&1; then
92
- mkdir -p micromamba;
93
- python -c "import requests, bz2, sys; data = requests.get('https://micro.mamba.pm/api/micromamba/{architecture}/1.5.7').content; sys.stdout.buffer.write(bz2.decompress(data))" | tar -xv -C $(pwd)/micromamba bin/micromamba --strip-components 1;
153
+ os.makedirs(dest_dir, exist_ok=True)
154
+ with storage.load_bytes([package["path"] for package in packages]) as results:
155
+ with concurrent.futures.ThreadPoolExecutor() as executor:
156
+ executor.map(
157
+ process_conda_package,
158
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
159
+ )
160
+ # for key, tmpfile, _ in results:
161
+
162
+ # # TODO: consider RAM disk
163
+ # dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
164
+ # os.makedirs(os.path.dirname(dest), exist_ok=True)
165
+ # shutil.move(tmpfile, dest)
166
+ return dest_dir
167
+
168
+ @timer
169
+ def download_pypi_packages(storage, packages, dest_dir):
170
+ def process_pypi_package(args):
171
+ key, tmpfile, dest_dir = args
172
+ dest = os.path.join(dest_dir, os.path.basename(key))
173
+ shutil.move(tmpfile, dest)
174
+
175
+ os.makedirs(dest_dir, exist_ok=True)
176
+ with storage.load_bytes([package["path"] for package in packages]) as results:
177
+ with concurrent.futures.ThreadPoolExecutor() as executor:
178
+ executor.map(
179
+ process_pypi_package,
180
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
181
+ )
182
+ # for key, tmpfile, _ in results:
183
+ # dest = os.path.join(dest_dir, os.path.basename(key))
184
+ # shutil.move(tmpfile, dest)
185
+ return dest_dir
186
+
187
+ @timer
188
+ def create_conda_environment(prefix, conda_pkgs_dir):
189
+ cmd = f'''set -e;
190
+ tmpfile=$(mktemp);
191
+ echo "@EXPLICIT" > "$tmpfile";
192
+ ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
193
+ export PATH=$PATH:$(pwd)/micromamba;
194
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
195
+ export MAMBA_NO_LOW_SPEED_LIMIT=1;
196
+ export MAMBA_USE_INDEX_CACHE=1;
197
+ export MAMBA_NO_PROGRESS_BARS=1;
198
+ export CONDA_FETCH_THREADS=1;
199
+ micromamba create --yes --offline --no-deps \
200
+ --safety-checks=disabled --no-extra-safety-checks \
201
+ --prefix {prefix} --file "$tmpfile" \
202
+ --no-pyc --no-rc --always-copy;
203
+ rm "$tmpfile"'''
204
+ run_cmd(cmd)
205
+
206
+ @timer
207
+ def install_pypi_packages(prefix, pypi_pkgs_dir):
208
+ cmd = f"""set -e;
94
209
  export PATH=$PATH:$(pwd)/micromamba;
95
- if ! command -v micromamba >/dev/null 2>&1; then
96
- echo "Failed to install Micromamba!";
97
- exit 1;
98
- fi;
99
- fi""",
100
- # Create a conda environment through Micromamba.
101
- f'''set -e;
102
- tmpfile=$(mktemp);
103
- echo "@EXPLICIT" > "$tmpfile";
104
- ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
105
- export PATH=$PATH:$(pwd)/micromamba;
106
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
107
- micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
108
- rm "$tmpfile"''',
109
- ]
110
-
111
- # Download PyPI packages.
112
- if "pypi" in env:
210
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
211
+ micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
212
+ install --root-user-action=ignore --no-compile --no-index \
213
+ --no-cache-dir --no-deps --prefer-binary \
214
+ --find-links={pypi_pkgs_dir} --no-user \
215
+ --no-warn-script-location --no-input \
216
+ {pypi_pkgs_dir}/*.whl
217
+ """
218
+ run_cmd(cmd)
219
+
220
+ @timer
221
+ def setup_environment(
222
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
223
+ ):
224
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
225
+ # install micromamba, download conda and pypi packages in parallel
226
+ futures = {
227
+ "micromamba": executor.submit(install_micromamba, architecture),
228
+ "conda_pkgs": executor.submit(
229
+ download_conda_packages, storage, env["conda"], conda_pkgs_dir
230
+ ),
231
+ }
232
+ if "pypi" in env:
233
+ futures["pypi_pkgs"] = executor.submit(
234
+ download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
235
+ )
236
+
237
+ # create conda environment after micromamba is installed and conda packages are downloaded
238
+ done, _ = concurrent.futures.wait(
239
+ [futures["micromamba"], futures["conda_pkgs"]],
240
+ return_when=concurrent.futures.ALL_COMPLETED,
241
+ )
242
+
243
+ for future in done:
244
+ future.result()
245
+
246
+ # start conda environment creation
247
+ futures["conda_env"] = executor.submit(
248
+ create_conda_environment, prefix, conda_pkgs_dir
249
+ )
250
+
251
+ if "pypi" in env:
252
+ # install pypi packages after conda environment is created and pypi packages are downloaded
253
+ done, _ = concurrent.futures.wait(
254
+ [futures["conda_env"], futures["pypi_pkgs"]],
255
+ return_when=concurrent.futures.ALL_COMPLETED,
256
+ )
257
+
258
+ for future in done:
259
+ future.result()
260
+
261
+ # install pypi packages
262
+ futures["pypi_install"] = executor.submit(
263
+ install_pypi_packages, prefix, pypi_pkgs_dir
264
+ )
265
+ # wait for pypi packages to be installed
266
+ futures["pypi_install"].result()
267
+ else:
268
+ # wait for conda environment to be created
269
+ futures["conda_env"].result()
270
+
271
+ if len(sys.argv) != 5:
272
+ print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
273
+ sys.exit(1)
274
+
275
+ try:
276
+ _, flow_name, id_, datastore_type, architecture = sys.argv
277
+
278
+ prefix = os.path.join(os.getcwd(), architecture, id_)
279
+ pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
280
+ conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
113
281
  pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
114
- with storage.load_bytes(
115
- [package["path"] for package in env["pypi"]]
116
- ) as results:
117
- for key, tmpfile, _ in results:
118
- dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
119
- os.makedirs(os.path.dirname(dest), exist_ok=True)
120
- shutil.move(tmpfile, dest)
121
-
122
- # Install PyPI packages.
123
- cmds.extend(
124
- [
125
- f"""set -e;
126
- export PATH=$PATH:$(pwd)/micromamba;
127
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
128
- micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
129
- ]
282
+ manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
283
+
284
+ datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
285
+ if not datastores:
286
+ print(f"No datastore found for type: {datastore_type}")
287
+ sys.exit(1)
288
+
289
+ storage = datastores[0](
290
+ _datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
130
291
  )
131
292
 
132
- for cmd in cmds:
133
- result = subprocess.run(
134
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
293
+ # Move MAGIC_FILE inside local datastore.
294
+ os.makedirs(manifest_dir, exist_ok=True)
295
+ shutil.move(
296
+ os.path.join(os.getcwd(), MAGIC_FILE),
297
+ os.path.join(manifest_dir, MAGIC_FILE),
135
298
  )
136
- if result.returncode != 0:
137
- print(f"Bootstrap failed while executing: {cmd}")
138
- print("Stdout:", result.stdout.decode())
139
- print("Stderr:", result.stderr.decode())
140
- sys.exit(1)
299
+ with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
300
+ env = json.load(f)[id_][architecture]
301
+
302
+ setup_environment(
303
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
304
+ )
305
+
306
+ except Exception as e:
307
+ print(f"Error: {str(e)}", file=sys.stderr)
308
+ sys.exit(1)