ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.13.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (65) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +180 -718
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +360 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/datastore/flow_datastore.py +2 -2
  12. metaflow/decorators.py +63 -2
  13. metaflow/exception.py +8 -2
  14. metaflow/extension_support/plugins.py +41 -27
  15. metaflow/flowspec.py +175 -23
  16. metaflow/graph.py +28 -27
  17. metaflow/includefile.py +50 -22
  18. metaflow/lint.py +35 -20
  19. metaflow/metaflow_config.py +6 -1
  20. metaflow/package.py +17 -3
  21. metaflow/parameters.py +87 -23
  22. metaflow/plugins/__init__.py +4 -0
  23. metaflow/plugins/airflow/airflow_cli.py +1 -0
  24. metaflow/plugins/argo/argo_workflows.py +41 -1
  25. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  26. metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
  27. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  28. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  29. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  30. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  31. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  32. metaflow/plugins/cards/card_creator.py +1 -0
  33. metaflow/plugins/cards/card_decorator.py +46 -8
  34. metaflow/plugins/datatools/s3/s3op.py +3 -3
  35. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  36. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  37. metaflow/plugins/pypi/bootstrap.py +196 -61
  38. metaflow/plugins/pypi/conda_decorator.py +20 -10
  39. metaflow/plugins/pypi/conda_environment.py +76 -21
  40. metaflow/plugins/pypi/micromamba.py +42 -15
  41. metaflow/plugins/pypi/pip.py +8 -3
  42. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  43. metaflow/plugins/timeout_decorator.py +2 -2
  44. metaflow/runner/click_api.py +240 -50
  45. metaflow/runner/deployer.py +1 -1
  46. metaflow/runner/deployer_impl.py +8 -3
  47. metaflow/runner/metaflow_runner.py +10 -2
  48. metaflow/runner/nbdeploy.py +2 -0
  49. metaflow/runner/nbrun.py +1 -1
  50. metaflow/runner/subprocess_manager.py +3 -1
  51. metaflow/runner/utils.py +41 -19
  52. metaflow/runtime.py +111 -73
  53. metaflow/sidecar/sidecar_worker.py +1 -1
  54. metaflow/user_configs/__init__.py +0 -0
  55. metaflow/user_configs/config_decorators.py +563 -0
  56. metaflow/user_configs/config_options.py +548 -0
  57. metaflow/user_configs/config_parameters.py +405 -0
  58. metaflow/util.py +17 -0
  59. metaflow/version.py +1 -1
  60. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/METADATA +3 -2
  61. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/RECORD +65 -55
  62. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/LICENSE +0 -0
  63. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/WHEEL +0 -0
  64. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/entry_points.txt +0 -0
  65. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/top_level.txt +0 -0
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
50
50
  The secret payload from AWS is EITHER a string OR a binary blob.
51
51
 
52
52
  If the secret contains a string payload ("SecretString"):
53
- - if the `parse_secret_string_as_json` option is True (default):
53
+ - if the `json` option is True (default):
54
54
  {SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
55
55
  top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
56
56
  always be casted to a string (if not already a string).
57
- - If `parse_secret_string_as_json` option is False:
58
- {SecretString} will be returned as a single entry in the result, with the key being the secret_id.
57
+ - If `json` option is False:
58
+ {SecretString} will be returned as a single entry in the result, where the key is either:
59
+ - the `secret_id`, OR
60
+ - the value set by `options={"env_var_name": custom_env_var_name}`.
59
61
 
60
- Otherwise, the secret contains a binary blob payload ("SecretBinary"). In this case
61
- - The result dic contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string
62
+ Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
63
+ - The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
62
64
 
63
- All keys in the result are sanitized to be more valid environment variable names. This is done on a best effort
65
+ All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
64
66
  basis. Further validation is expected to be done by the invoking @secrets decorator itself.
65
67
 
66
- :param secret_id: ARN or friendly name of the secret
67
- :param options: unused
68
- :param role: AWS IAM Role ARN to assume before reading the secret
69
- :return: dict of environment variables. All keys and values are strings.
68
+ :param secret_id: ARN or friendly name of the secret.
69
+ :param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
70
+ :param role: AWS IAM Role ARN to assume before reading the secret.
71
+ :return: Dictionary of environment variables. All keys and values are strings.
70
72
  """
73
+
71
74
  import botocore
72
75
  from metaflow.plugins.aws.aws_client import get_aws_client
73
76
 
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
18
18
  SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
19
19
  )
20
20
  from metaflow.parameters import deploy_time_eval
21
+ from metaflow.user_configs.config_options import ConfigInput
21
22
  from metaflow.util import dict_to_cli_options, to_pascalcase
22
23
 
23
24
  from ..batch.batch import Batch
@@ -71,6 +72,7 @@ class StepFunctions(object):
71
72
  self.username = username
72
73
  self.max_workers = max_workers
73
74
  self.workflow_timeout = workflow_timeout
75
+ self.config_parameters = self._process_config_parameters()
74
76
 
75
77
  # https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
76
78
  self.use_distributed_map = use_distributed_map
@@ -485,6 +487,10 @@ class StepFunctions(object):
485
487
  "case-insensitive." % param.name
486
488
  )
487
489
  seen.add(norm)
490
+ # NOTE: We skip config parameters as these do not have dynamic values,
491
+ # and need to be treated differently.
492
+ if param.IS_CONFIG_PARAMETER:
493
+ continue
488
494
 
489
495
  is_required = param.kwargs.get("required", False)
490
496
  # Throw an exception if a schedule is set for a flow with required
@@ -501,6 +507,27 @@ class StepFunctions(object):
501
507
  parameters.append(dict(name=param.name, value=value))
502
508
  return parameters
503
509
 
510
+ def _process_config_parameters(self):
511
+ parameters = []
512
+ seen = set()
513
+ for var, param in self.flow._get_parameters():
514
+ if not param.IS_CONFIG_PARAMETER:
515
+ continue
516
+ # Throw an exception if the parameter is specified twice.
517
+ norm = param.name.lower()
518
+ if norm in seen:
519
+ raise MetaflowException(
520
+ "Parameter *%s* is specified twice. "
521
+ "Note that parameter names are "
522
+ "case-insensitive." % param.name
523
+ )
524
+ seen.add(norm)
525
+
526
+ parameters.append(
527
+ dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
528
+ )
529
+ return parameters
530
+
504
531
  def _batch(self, node):
505
532
  attrs = {
506
533
  # metaflow.user is only used for setting the AWS Job Name.
@@ -747,6 +774,11 @@ class StepFunctions(object):
747
774
  metaflow_version["production_token"] = self.production_token
748
775
  env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
749
776
 
777
+ # map config values
778
+ cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
779
+ if cfg_env:
780
+ env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
781
+
750
782
  # Set AWS DynamoDb Table Name for state tracking for for-eaches.
751
783
  # There are three instances when metaflow runtime directly interacts
752
784
  # with AWS DynamoDB.
@@ -326,6 +326,7 @@ def make_flow(
326
326
 
327
327
  # Attach AWS Batch decorator to the flow
328
328
  decorators._attach_decorators(obj.flow, [BatchDecorator.name])
329
+ decorators._init(obj.flow)
329
330
  decorators._init_step_decorators(
330
331
  obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
331
332
  )
@@ -46,6 +46,7 @@ class StepFunctionsTriggeredRun(TriggeredRun):
46
46
  )
47
47
 
48
48
  command_obj = self.deployer.spm.get(pid)
49
+ command_obj.sync_wait()
49
50
  return command_obj.process.returncode == 0
50
51
 
51
52
 
@@ -174,6 +175,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
174
175
  )
175
176
 
176
177
  command_obj = self.deployer.spm.get(pid)
178
+ command_obj.sync_wait()
177
179
  return command_obj.process.returncode == 0
178
180
 
179
181
  def trigger(self, **kwargs) -> StepFunctionsTriggeredRun:
@@ -217,6 +219,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
217
219
  attribute_file_fd, command_obj, self.deployer.file_read_timeout
218
220
  )
219
221
 
222
+ command_obj.sync_wait()
220
223
  if command_obj.process.returncode == 0:
221
224
  return StepFunctionsTriggeredRun(
222
225
  deployer=self.deployer, content=content
@@ -122,6 +122,7 @@ class CardCreator:
122
122
  executable,
123
123
  sys.argv[0],
124
124
  ]
125
+
125
126
  cmd += self._top_level_options + [
126
127
  "card",
127
128
  "create",
@@ -1,13 +1,16 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import tempfile
5
+
1
6
  from metaflow.decorators import StepDecorator
2
7
  from metaflow.metaflow_current import current
8
+ from metaflow.user_configs.config_options import ConfigInput
9
+ from metaflow.user_configs.config_parameters import dump_config_values
3
10
  from metaflow.util import to_unicode
11
+
4
12
  from .component_serializer import CardComponentCollector, get_card_class
5
13
  from .card_creator import CardCreator
6
-
7
-
8
- # from metaflow import get_metadata
9
- import re
10
-
11
14
  from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
12
15
 
13
16
  ASYNC_TIMEOUT = 30
@@ -111,6 +114,14 @@ class CardDecorator(StepDecorator):
111
114
  self._logger = logger
112
115
  self.card_options = None
113
116
 
117
+ # We check for configuration options. We do this here before they are
118
+ # converted to properties.
119
+ self._config_values = [
120
+ (config.name, ConfigInput.make_key_name(config.name))
121
+ for _, config in flow._get_parameters()
122
+ if config.IS_CONFIG_PARAMETER
123
+ ]
124
+
114
125
  self.card_options = self.attributes["options"]
115
126
 
116
127
  evt_name = "step-init"
@@ -146,6 +157,18 @@ class CardDecorator(StepDecorator):
146
157
  self._task_datastore = task_datastore
147
158
  self._metadata = metadata
148
159
 
160
+ # If we have configs, we need to dump them to a file so we can re-use them
161
+ # when calling the card creation subprocess.
162
+ if self._config_values:
163
+ with tempfile.NamedTemporaryFile(
164
+ mode="w", encoding="utf-8", delete=False
165
+ ) as config_file:
166
+ config_value = dump_config_values(flow)
167
+ json.dump(config_value, config_file)
168
+ self._config_file_name = config_file.name
169
+ else:
170
+ self._config_file_name = None
171
+
149
172
  card_type = self.attributes["type"]
150
173
  card_class = get_card_class(card_type)
151
174
 
@@ -179,7 +202,7 @@ class CardDecorator(StepDecorator):
179
202
  # we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
180
203
  if not self._is_event_registered("pre-step"):
181
204
  self._register_event("pre-step")
182
- self._set_card_creator(CardCreator(self._create_top_level_args()))
205
+ self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
183
206
 
184
207
  current._update_env(
185
208
  {"card": CardComponentCollector(self._logger, self.card_creator)}
@@ -223,6 +246,13 @@ class CardDecorator(StepDecorator):
223
246
  self.card_creator.create(mode="render", final=True, **create_options)
224
247
  self.card_creator.create(mode="refresh", final=True, **create_options)
225
248
 
249
+ # Unlink the config file if it exists
250
+ if self._config_file_name:
251
+ try:
252
+ os.unlink(self._config_file_name)
253
+ except Exception as e:
254
+ pass
255
+
226
256
  @staticmethod
227
257
  def _options(mapping):
228
258
  for k, v in mapping.items():
@@ -232,9 +262,13 @@ class CardDecorator(StepDecorator):
232
262
  for value in v:
233
263
  yield "--%s" % k
234
264
  if not isinstance(value, bool):
235
- yield to_unicode(value)
265
+ if isinstance(value, tuple):
266
+ for val in value:
267
+ yield to_unicode(val)
268
+ else:
269
+ yield to_unicode(value)
236
270
 
237
- def _create_top_level_args(self):
271
+ def _create_top_level_args(self, flow):
238
272
  top_level_options = {
239
273
  "quiet": True,
240
274
  "metadata": self._metadata.TYPE,
@@ -247,4 +281,8 @@ class CardDecorator(StepDecorator):
247
281
  # We don't provide --with as all execution is taking place in
248
282
  # the context of the main process
249
283
  }
284
+ if self._config_values:
285
+ top_level_options["config-value"] = self._config_values
286
+ top_level_options["local-config-file"] = self._config_file_name
287
+
250
288
  return list(self._options(top_level_options))
@@ -722,8 +722,8 @@ def cli():
722
722
  pass
723
723
 
724
724
 
725
- @tracing.cli_entrypoint("s3op/list")
726
725
  @cli.command("list", help="List S3 objects")
726
+ @tracing.cli_entrypoint("s3op/list")
727
727
  @click.option(
728
728
  "--recursive/--no-recursive",
729
729
  default=False,
@@ -782,8 +782,8 @@ def lst(
782
782
  print(format_result_line(idx, url.prefix, url.url, str(size)))
783
783
 
784
784
 
785
- @tracing.cli_entrypoint("s3op/put")
786
785
  @cli.command(help="Upload files to S3")
786
+ @tracing.cli_entrypoint("s3op/put")
787
787
  @click.option(
788
788
  "--file",
789
789
  "files",
@@ -977,8 +977,8 @@ def _populate_prefixes(prefixes, inputs):
977
977
  return prefixes, is_transient_retry
978
978
 
979
979
 
980
- @tracing.cli_entrypoint("s3op/get")
981
980
  @cli.command(help="Download files from S3")
981
+ @tracing.cli_entrypoint("s3op/get")
982
982
  @click.option(
983
983
  "--recursive/--no-recursive",
984
984
  default=False,
@@ -33,12 +33,12 @@ def kubernetes():
33
33
  pass
34
34
 
35
35
 
36
- @tracing.cli_entrypoint("kubernetes/step")
37
36
  @kubernetes.command(
38
37
  help="Execute a single task on Kubernetes. This command calls the top-level step "
39
38
  "command inside a Kubernetes pod with the given options. Typically you do not call "
40
39
  "this command directly; it is used internally by Metaflow."
41
40
  )
41
+ @tracing.cli_entrypoint("kubernetes/step")
42
42
  @click.argument("step-name")
43
43
  @click.argument("code-package-sha")
44
44
  @click.argument("code-package-url")
@@ -153,8 +153,8 @@ class KubernetesDecorator(StepDecorator):
153
153
  supports_conda_environment = True
154
154
  target_platform = "linux-64"
155
155
 
156
- def __init__(self, attributes=None, statically_defined=False):
157
- super(KubernetesDecorator, self).__init__(attributes, statically_defined)
156
+ def init(self):
157
+ super(KubernetesDecorator, self).init()
158
158
 
159
159
  if not self.attributes["namespace"]:
160
160
  self.attributes["namespace"] = KUBERNETES_NAMESPACE
@@ -1,4 +1,5 @@
1
1
  import bz2
2
+ import concurrent.futures
2
3
  import io
3
4
  import json
4
5
  import os
@@ -6,6 +7,9 @@ import shutil
6
7
  import subprocess
7
8
  import sys
8
9
  import tarfile
10
+ import time
11
+
12
+ import requests
9
13
 
10
14
  from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
11
15
  from metaflow.plugins import DATASTORES
@@ -15,6 +19,18 @@ from . import MAGIC_FILE, _datastore_packageroot
15
19
 
16
20
  # Bootstraps a valid conda virtual environment composed of conda and pypi packages
17
21
 
22
+
23
+ def timer(func):
24
+ def wrapper(*args, **kwargs):
25
+ start_time = time.time()
26
+ result = func(*args, **kwargs)
27
+ duration = time.time() - start_time
28
+ # print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
29
+ return result
30
+
31
+ return wrapper
32
+
33
+
18
34
  if __name__ == "__main__":
19
35
  if len(sys.argv) != 5:
20
36
  print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
@@ -47,6 +63,8 @@ if __name__ == "__main__":
47
63
 
48
64
  prefix = os.path.join(os.getcwd(), architecture, id_)
49
65
  pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
66
+ conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
67
+ pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
50
68
  manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
51
69
 
52
70
  datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
@@ -64,77 +82,194 @@ if __name__ == "__main__":
64
82
  os.path.join(os.getcwd(), MAGIC_FILE),
65
83
  os.path.join(manifest_dir, MAGIC_FILE),
66
84
  )
67
-
68
85
  with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
69
86
  env = json.load(f)[id_][architecture]
70
87
 
71
- # Download Conda packages.
72
- conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
73
- with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
74
- for key, tmpfile, _ in results:
88
+ def run_cmd(cmd):
89
+ result = subprocess.run(
90
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
91
+ )
92
+ if result.returncode != 0:
93
+ print(f"Bootstrap failed while executing: {cmd}")
94
+ print("Stdout:", result.stdout)
95
+ print("Stderr:", result.stderr)
96
+ sys.exit(1)
97
+
98
+ @timer
99
+ def install_micromamba(architecture):
100
+ micromamba_dir = os.path.join(os.getcwd(), "micromamba")
101
+ micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
102
+
103
+ if which("micromamba"):
104
+ return which("micromamba")
105
+ if os.path.exists(micromamba_path):
106
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
107
+ return micromamba_path
108
+
109
+ # Download and extract in one go
110
+ # TODO: Serve from cloudflare
111
+ url = f"https://micro.mamba.pm/api/micromamba/{architecture}/2.0.4"
112
+
113
+ # Prepare directory once
114
+ os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
115
+
116
+ # Stream and process directly to file
117
+ with requests.get(url, stream=True, timeout=30) as response:
118
+ if response.status_code != 200:
119
+ raise Exception(
120
+ f"Failed to download micromamba: HTTP {response.status_code}"
121
+ )
122
+
123
+ decompressor = bz2.BZ2Decompressor()
124
+
125
+ # Process in memory without temporary files
126
+ tar_content = decompressor.decompress(response.raw.read())
127
+
128
+ with tarfile.open(fileobj=io.BytesIO(tar_content), mode="r:") as tar:
129
+ member = tar.getmember("bin/micromamba")
130
+ # Extract directly to final location
131
+ with open(micromamba_path, "wb") as f:
132
+ f.write(tar.extractfile(member).read())
133
+
134
+ # Set executable permission
135
+ os.chmod(micromamba_path, 0o755)
136
+
137
+ # Update PATH only once at the end
138
+ os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
139
+ return micromamba_path
140
+
141
+ @timer
142
+ def download_conda_packages(storage, packages, dest_dir):
143
+
144
+ def process_conda_package(args):
75
145
  # Ensure that conda packages go into architecture specific folders.
76
146
  # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
77
147
  # Micromamba into believing that all packages are coming from a local
78
148
  # channel - the only hurdle is ensuring that packages are organised
79
149
  # properly.
80
-
81
- # TODO: consider RAM disk
82
- dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
150
+ key, tmpfile, dest_dir = args
151
+ dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
83
152
  os.makedirs(os.path.dirname(dest), exist_ok=True)
84
153
  shutil.move(tmpfile, dest)
85
154
 
86
- # Create Conda environment.
87
- cmds = [
88
- # TODO: check if mamba or conda are already available on the image
89
- # TODO: micromamba installation can be pawned off to micromamba.py
90
- f"""set -e;
91
- if ! command -v micromamba >/dev/null 2>&1; then
92
- mkdir -p micromamba;
93
- python -c "import requests, bz2, sys; data = requests.get('https://micro.mamba.pm/api/micromamba/{architecture}/1.5.7').content; sys.stdout.buffer.write(bz2.decompress(data))" | tar -xv -C $(pwd)/micromamba bin/micromamba --strip-components 1;
155
+ os.makedirs(dest_dir, exist_ok=True)
156
+ with storage.load_bytes([package["path"] for package in packages]) as results:
157
+ with concurrent.futures.ThreadPoolExecutor() as executor:
158
+ executor.map(
159
+ process_conda_package,
160
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
161
+ )
162
+ # for key, tmpfile, _ in results:
163
+
164
+ # # TODO: consider RAM disk
165
+ # dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
166
+ # os.makedirs(os.path.dirname(dest), exist_ok=True)
167
+ # shutil.move(tmpfile, dest)
168
+ return dest_dir
169
+
170
+ @timer
171
+ def download_pypi_packages(storage, packages, dest_dir):
172
+
173
+ def process_pypi_package(args):
174
+ key, tmpfile, dest_dir = args
175
+ dest = os.path.join(dest_dir, os.path.basename(key))
176
+ shutil.move(tmpfile, dest)
177
+
178
+ os.makedirs(dest_dir, exist_ok=True)
179
+ with storage.load_bytes([package["path"] for package in packages]) as results:
180
+ with concurrent.futures.ThreadPoolExecutor() as executor:
181
+ executor.map(
182
+ process_pypi_package,
183
+ [(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
184
+ )
185
+ # for key, tmpfile, _ in results:
186
+ # dest = os.path.join(dest_dir, os.path.basename(key))
187
+ # shutil.move(tmpfile, dest)
188
+ return dest_dir
189
+
190
+ @timer
191
+ def create_conda_environment(prefix, conda_pkgs_dir):
192
+ cmd = f'''set -e;
193
+ tmpfile=$(mktemp);
194
+ echo "@EXPLICIT" > "$tmpfile";
195
+ ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
94
196
  export PATH=$PATH:$(pwd)/micromamba;
95
- if ! command -v micromamba >/dev/null 2>&1; then
96
- echo "Failed to install Micromamba!";
97
- exit 1;
98
- fi;
99
- fi""",
100
- # Create a conda environment through Micromamba.
101
- f'''set -e;
102
- tmpfile=$(mktemp);
103
- echo "@EXPLICIT" > "$tmpfile";
104
- ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
105
- export PATH=$PATH:$(pwd)/micromamba;
106
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
107
- micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
108
- rm "$tmpfile"''',
109
- ]
110
-
111
- # Download PyPI packages.
112
- if "pypi" in env:
113
- pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
114
- with storage.load_bytes(
115
- [package["path"] for package in env["pypi"]]
116
- ) as results:
117
- for key, tmpfile, _ in results:
118
- dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
119
- os.makedirs(os.path.dirname(dest), exist_ok=True)
120
- shutil.move(tmpfile, dest)
121
-
122
- # Install PyPI packages.
123
- cmds.extend(
124
- [
125
- f"""set -e;
126
- export PATH=$PATH:$(pwd)/micromamba;
127
- export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
128
- micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
129
- ]
130
- )
197
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
198
+ export MAMBA_NO_LOW_SPEED_LIMIT=1;
199
+ export MAMBA_USE_INDEX_CACHE=1;
200
+ export MAMBA_NO_PROGRESS_BARS=1;
201
+ export CONDA_FETCH_THREADS=1;
202
+ micromamba create --yes --offline --no-deps \
203
+ --safety-checks=disabled --no-extra-safety-checks \
204
+ --prefix {prefix} --file "$tmpfile" \
205
+ --no-pyc --no-rc --always-copy;
206
+ rm "$tmpfile"'''
207
+ run_cmd(cmd)
131
208
 
132
- for cmd in cmds:
133
- result = subprocess.run(
134
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
135
- )
136
- if result.returncode != 0:
137
- print(f"Bootstrap failed while executing: {cmd}")
138
- print("Stdout:", result.stdout.decode())
139
- print("Stderr:", result.stderr.decode())
140
- sys.exit(1)
209
+ @timer
210
+ def install_pypi_packages(prefix, pypi_pkgs_dir):
211
+
212
+ cmd = f"""set -e;
213
+ export PATH=$PATH:$(pwd)/micromamba;
214
+ export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
215
+ micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
216
+ install --root-user-action=ignore --no-compile --no-index \
217
+ --no-cache-dir --no-deps --prefer-binary \
218
+ --find-links={pypi_pkgs_dir} --no-user \
219
+ --no-warn-script-location --no-input \
220
+ {pypi_pkgs_dir}/*.whl
221
+ """
222
+ run_cmd(cmd)
223
+
224
+ @timer
225
+ def setup_environment(
226
+ architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
227
+ ):
228
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
229
+ # install micromamba, download conda and pypi packages in parallel
230
+ futures = {
231
+ "micromamba": executor.submit(install_micromamba, architecture),
232
+ "conda_pkgs": executor.submit(
233
+ download_conda_packages, storage, env["conda"], conda_pkgs_dir
234
+ ),
235
+ }
236
+ if "pypi" in env:
237
+ futures["pypi_pkgs"] = executor.submit(
238
+ download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
239
+ )
240
+
241
+ # create conda environment after micromamba is installed and conda packages are downloaded
242
+ done, _ = concurrent.futures.wait(
243
+ [futures["micromamba"], futures["conda_pkgs"]],
244
+ return_when=concurrent.futures.ALL_COMPLETED,
245
+ )
246
+
247
+ for future in done:
248
+ future.result()
249
+
250
+ # start conda environment creation
251
+ futures["conda_env"] = executor.submit(
252
+ create_conda_environment, prefix, conda_pkgs_dir
253
+ )
254
+
255
+ if "pypi" in env:
256
+ # install pypi packages after conda environment is created and pypi packages are downloaded
257
+ done, _ = concurrent.futures.wait(
258
+ [futures["conda_env"], futures["pypi_pkgs"]],
259
+ return_when=concurrent.futures.ALL_COMPLETED,
260
+ )
261
+
262
+ for future in done:
263
+ future.result()
264
+
265
+ # install pypi packages
266
+ futures["pypi_install"] = executor.submit(
267
+ install_pypi_packages, prefix, pypi_pkgs_dir
268
+ )
269
+ # wait for pypi packages to be installed
270
+ futures["pypi_install"].result()
271
+ else:
272
+ # wait for conda environment to be created
273
+ futures["conda_env"].result()
274
+
275
+ setup_environment(architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir)