ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.13.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +180 -718
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +360 -0
- metaflow/cli_components/step_cmd.py +189 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/decorators.py +63 -2
- metaflow/exception.py +8 -2
- metaflow/extension_support/plugins.py +41 -27
- metaflow/flowspec.py +175 -23
- metaflow/graph.py +28 -27
- metaflow/includefile.py +50 -22
- metaflow/lint.py +35 -20
- metaflow/metaflow_config.py +6 -1
- metaflow/package.py +17 -3
- metaflow/parameters.py +87 -23
- metaflow/plugins/__init__.py +4 -0
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +41 -1
- metaflow/plugins/argo/argo_workflows_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
- metaflow/plugins/aws/batch/batch_decorator.py +2 -2
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/step_functions.py +32 -0
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
- metaflow/plugins/pypi/bootstrap.py +196 -61
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/conda_environment.py +76 -21
- metaflow/plugins/pypi/micromamba.py +42 -15
- metaflow/plugins/pypi/pip.py +8 -3
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +240 -50
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +8 -3
- metaflow/runner/metaflow_runner.py +10 -2
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +3 -1
- metaflow/runner/utils.py +41 -19
- metaflow/runtime.py +111 -73
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +548 -0
- metaflow/user_configs/config_parameters.py +405 -0
- metaflow/util.py +17 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/METADATA +3 -2
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/RECORD +65 -55
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.13.0.1.dist-info}/top_level.txt +0 -0
|
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
|
|
|
50
50
|
The secret payload from AWS is EITHER a string OR a binary blob.
|
|
51
51
|
|
|
52
52
|
If the secret contains a string payload ("SecretString"):
|
|
53
|
-
- if the `
|
|
53
|
+
- if the `json` option is True (default):
|
|
54
54
|
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
|
55
55
|
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
|
56
56
|
always be casted to a string (if not already a string).
|
|
57
|
-
- If `
|
|
58
|
-
{SecretString} will be returned as a single entry in the result,
|
|
57
|
+
- If `json` option is False:
|
|
58
|
+
{SecretString} will be returned as a single entry in the result, where the key is either:
|
|
59
|
+
- the `secret_id`, OR
|
|
60
|
+
- the value set by `options={"env_var_name": custom_env_var_name}`.
|
|
59
61
|
|
|
60
|
-
Otherwise, the secret contains a binary blob payload ("SecretBinary")
|
|
61
|
-
- The result
|
|
62
|
+
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
|
|
63
|
+
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
|
|
62
64
|
|
|
63
|
-
All keys in the result are sanitized to be more valid environment variable names. This is done on a best
|
|
65
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
|
|
64
66
|
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
|
65
67
|
|
|
66
|
-
:param secret_id: ARN or friendly name of the secret
|
|
67
|
-
:param options:
|
|
68
|
-
:param role: AWS IAM Role ARN to assume before reading the secret
|
|
69
|
-
:return:
|
|
68
|
+
:param secret_id: ARN or friendly name of the secret.
|
|
69
|
+
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
|
|
70
|
+
:param role: AWS IAM Role ARN to assume before reading the secret.
|
|
71
|
+
:return: Dictionary of environment variables. All keys and values are strings.
|
|
70
72
|
"""
|
|
73
|
+
|
|
71
74
|
import botocore
|
|
72
75
|
from metaflow.plugins.aws.aws_client import get_aws_client
|
|
73
76
|
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
|
|
19
19
|
)
|
|
20
20
|
from metaflow.parameters import deploy_time_eval
|
|
21
|
+
from metaflow.user_configs.config_options import ConfigInput
|
|
21
22
|
from metaflow.util import dict_to_cli_options, to_pascalcase
|
|
22
23
|
|
|
23
24
|
from ..batch.batch import Batch
|
|
@@ -71,6 +72,7 @@ class StepFunctions(object):
|
|
|
71
72
|
self.username = username
|
|
72
73
|
self.max_workers = max_workers
|
|
73
74
|
self.workflow_timeout = workflow_timeout
|
|
75
|
+
self.config_parameters = self._process_config_parameters()
|
|
74
76
|
|
|
75
77
|
# https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
|
|
76
78
|
self.use_distributed_map = use_distributed_map
|
|
@@ -485,6 +487,10 @@ class StepFunctions(object):
|
|
|
485
487
|
"case-insensitive." % param.name
|
|
486
488
|
)
|
|
487
489
|
seen.add(norm)
|
|
490
|
+
# NOTE: We skip config parameters as these do not have dynamic values,
|
|
491
|
+
# and need to be treated differently.
|
|
492
|
+
if param.IS_CONFIG_PARAMETER:
|
|
493
|
+
continue
|
|
488
494
|
|
|
489
495
|
is_required = param.kwargs.get("required", False)
|
|
490
496
|
# Throw an exception if a schedule is set for a flow with required
|
|
@@ -501,6 +507,27 @@ class StepFunctions(object):
|
|
|
501
507
|
parameters.append(dict(name=param.name, value=value))
|
|
502
508
|
return parameters
|
|
503
509
|
|
|
510
|
+
def _process_config_parameters(self):
|
|
511
|
+
parameters = []
|
|
512
|
+
seen = set()
|
|
513
|
+
for var, param in self.flow._get_parameters():
|
|
514
|
+
if not param.IS_CONFIG_PARAMETER:
|
|
515
|
+
continue
|
|
516
|
+
# Throw an exception if the parameter is specified twice.
|
|
517
|
+
norm = param.name.lower()
|
|
518
|
+
if norm in seen:
|
|
519
|
+
raise MetaflowException(
|
|
520
|
+
"Parameter *%s* is specified twice. "
|
|
521
|
+
"Note that parameter names are "
|
|
522
|
+
"case-insensitive." % param.name
|
|
523
|
+
)
|
|
524
|
+
seen.add(norm)
|
|
525
|
+
|
|
526
|
+
parameters.append(
|
|
527
|
+
dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
|
|
528
|
+
)
|
|
529
|
+
return parameters
|
|
530
|
+
|
|
504
531
|
def _batch(self, node):
|
|
505
532
|
attrs = {
|
|
506
533
|
# metaflow.user is only used for setting the AWS Job Name.
|
|
@@ -747,6 +774,11 @@ class StepFunctions(object):
|
|
|
747
774
|
metaflow_version["production_token"] = self.production_token
|
|
748
775
|
env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
|
|
749
776
|
|
|
777
|
+
# map config values
|
|
778
|
+
cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
|
|
779
|
+
if cfg_env:
|
|
780
|
+
env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
|
|
781
|
+
|
|
750
782
|
# Set AWS DynamoDb Table Name for state tracking for for-eaches.
|
|
751
783
|
# There are three instances when metaflow runtime directly interacts
|
|
752
784
|
# with AWS DynamoDB.
|
|
@@ -326,6 +326,7 @@ def make_flow(
|
|
|
326
326
|
|
|
327
327
|
# Attach AWS Batch decorator to the flow
|
|
328
328
|
decorators._attach_decorators(obj.flow, [BatchDecorator.name])
|
|
329
|
+
decorators._init(obj.flow)
|
|
329
330
|
decorators._init_step_decorators(
|
|
330
331
|
obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
|
|
331
332
|
)
|
|
@@ -46,6 +46,7 @@ class StepFunctionsTriggeredRun(TriggeredRun):
|
|
|
46
46
|
)
|
|
47
47
|
|
|
48
48
|
command_obj = self.deployer.spm.get(pid)
|
|
49
|
+
command_obj.sync_wait()
|
|
49
50
|
return command_obj.process.returncode == 0
|
|
50
51
|
|
|
51
52
|
|
|
@@ -174,6 +175,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
174
175
|
)
|
|
175
176
|
|
|
176
177
|
command_obj = self.deployer.spm.get(pid)
|
|
178
|
+
command_obj.sync_wait()
|
|
177
179
|
return command_obj.process.returncode == 0
|
|
178
180
|
|
|
179
181
|
def trigger(self, **kwargs) -> StepFunctionsTriggeredRun:
|
|
@@ -217,6 +219,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
217
219
|
attribute_file_fd, command_obj, self.deployer.file_read_timeout
|
|
218
220
|
)
|
|
219
221
|
|
|
222
|
+
command_obj.sync_wait()
|
|
220
223
|
if command_obj.process.returncode == 0:
|
|
221
224
|
return StepFunctionsTriggeredRun(
|
|
222
225
|
deployer=self.deployer, content=content
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
1
6
|
from metaflow.decorators import StepDecorator
|
|
2
7
|
from metaflow.metaflow_current import current
|
|
8
|
+
from metaflow.user_configs.config_options import ConfigInput
|
|
9
|
+
from metaflow.user_configs.config_parameters import dump_config_values
|
|
3
10
|
from metaflow.util import to_unicode
|
|
11
|
+
|
|
4
12
|
from .component_serializer import CardComponentCollector, get_card_class
|
|
5
13
|
from .card_creator import CardCreator
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# from metaflow import get_metadata
|
|
9
|
-
import re
|
|
10
|
-
|
|
11
14
|
from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
|
|
12
15
|
|
|
13
16
|
ASYNC_TIMEOUT = 30
|
|
@@ -111,6 +114,14 @@ class CardDecorator(StepDecorator):
|
|
|
111
114
|
self._logger = logger
|
|
112
115
|
self.card_options = None
|
|
113
116
|
|
|
117
|
+
# We check for configuration options. We do this here before they are
|
|
118
|
+
# converted to properties.
|
|
119
|
+
self._config_values = [
|
|
120
|
+
(config.name, ConfigInput.make_key_name(config.name))
|
|
121
|
+
for _, config in flow._get_parameters()
|
|
122
|
+
if config.IS_CONFIG_PARAMETER
|
|
123
|
+
]
|
|
124
|
+
|
|
114
125
|
self.card_options = self.attributes["options"]
|
|
115
126
|
|
|
116
127
|
evt_name = "step-init"
|
|
@@ -146,6 +157,18 @@ class CardDecorator(StepDecorator):
|
|
|
146
157
|
self._task_datastore = task_datastore
|
|
147
158
|
self._metadata = metadata
|
|
148
159
|
|
|
160
|
+
# If we have configs, we need to dump them to a file so we can re-use them
|
|
161
|
+
# when calling the card creation subprocess.
|
|
162
|
+
if self._config_values:
|
|
163
|
+
with tempfile.NamedTemporaryFile(
|
|
164
|
+
mode="w", encoding="utf-8", delete=False
|
|
165
|
+
) as config_file:
|
|
166
|
+
config_value = dump_config_values(flow)
|
|
167
|
+
json.dump(config_value, config_file)
|
|
168
|
+
self._config_file_name = config_file.name
|
|
169
|
+
else:
|
|
170
|
+
self._config_file_name = None
|
|
171
|
+
|
|
149
172
|
card_type = self.attributes["type"]
|
|
150
173
|
card_class = get_card_class(card_type)
|
|
151
174
|
|
|
@@ -179,7 +202,7 @@ class CardDecorator(StepDecorator):
|
|
|
179
202
|
# we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
|
|
180
203
|
if not self._is_event_registered("pre-step"):
|
|
181
204
|
self._register_event("pre-step")
|
|
182
|
-
self._set_card_creator(CardCreator(self._create_top_level_args()))
|
|
205
|
+
self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
|
|
183
206
|
|
|
184
207
|
current._update_env(
|
|
185
208
|
{"card": CardComponentCollector(self._logger, self.card_creator)}
|
|
@@ -223,6 +246,13 @@ class CardDecorator(StepDecorator):
|
|
|
223
246
|
self.card_creator.create(mode="render", final=True, **create_options)
|
|
224
247
|
self.card_creator.create(mode="refresh", final=True, **create_options)
|
|
225
248
|
|
|
249
|
+
# Unlink the config file if it exists
|
|
250
|
+
if self._config_file_name:
|
|
251
|
+
try:
|
|
252
|
+
os.unlink(self._config_file_name)
|
|
253
|
+
except Exception as e:
|
|
254
|
+
pass
|
|
255
|
+
|
|
226
256
|
@staticmethod
|
|
227
257
|
def _options(mapping):
|
|
228
258
|
for k, v in mapping.items():
|
|
@@ -232,9 +262,13 @@ class CardDecorator(StepDecorator):
|
|
|
232
262
|
for value in v:
|
|
233
263
|
yield "--%s" % k
|
|
234
264
|
if not isinstance(value, bool):
|
|
235
|
-
|
|
265
|
+
if isinstance(value, tuple):
|
|
266
|
+
for val in value:
|
|
267
|
+
yield to_unicode(val)
|
|
268
|
+
else:
|
|
269
|
+
yield to_unicode(value)
|
|
236
270
|
|
|
237
|
-
def _create_top_level_args(self):
|
|
271
|
+
def _create_top_level_args(self, flow):
|
|
238
272
|
top_level_options = {
|
|
239
273
|
"quiet": True,
|
|
240
274
|
"metadata": self._metadata.TYPE,
|
|
@@ -247,4 +281,8 @@ class CardDecorator(StepDecorator):
|
|
|
247
281
|
# We don't provide --with as all execution is taking place in
|
|
248
282
|
# the context of the main process
|
|
249
283
|
}
|
|
284
|
+
if self._config_values:
|
|
285
|
+
top_level_options["config-value"] = self._config_values
|
|
286
|
+
top_level_options["local-config-file"] = self._config_file_name
|
|
287
|
+
|
|
250
288
|
return list(self._options(top_level_options))
|
|
@@ -722,8 +722,8 @@ def cli():
|
|
|
722
722
|
pass
|
|
723
723
|
|
|
724
724
|
|
|
725
|
-
@tracing.cli_entrypoint("s3op/list")
|
|
726
725
|
@cli.command("list", help="List S3 objects")
|
|
726
|
+
@tracing.cli_entrypoint("s3op/list")
|
|
727
727
|
@click.option(
|
|
728
728
|
"--recursive/--no-recursive",
|
|
729
729
|
default=False,
|
|
@@ -782,8 +782,8 @@ def lst(
|
|
|
782
782
|
print(format_result_line(idx, url.prefix, url.url, str(size)))
|
|
783
783
|
|
|
784
784
|
|
|
785
|
-
@tracing.cli_entrypoint("s3op/put")
|
|
786
785
|
@cli.command(help="Upload files to S3")
|
|
786
|
+
@tracing.cli_entrypoint("s3op/put")
|
|
787
787
|
@click.option(
|
|
788
788
|
"--file",
|
|
789
789
|
"files",
|
|
@@ -977,8 +977,8 @@ def _populate_prefixes(prefixes, inputs):
|
|
|
977
977
|
return prefixes, is_transient_retry
|
|
978
978
|
|
|
979
979
|
|
|
980
|
-
@tracing.cli_entrypoint("s3op/get")
|
|
981
980
|
@cli.command(help="Download files from S3")
|
|
981
|
+
@tracing.cli_entrypoint("s3op/get")
|
|
982
982
|
@click.option(
|
|
983
983
|
"--recursive/--no-recursive",
|
|
984
984
|
default=False,
|
|
@@ -33,12 +33,12 @@ def kubernetes():
|
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
@tracing.cli_entrypoint("kubernetes/step")
|
|
37
36
|
@kubernetes.command(
|
|
38
37
|
help="Execute a single task on Kubernetes. This command calls the top-level step "
|
|
39
38
|
"command inside a Kubernetes pod with the given options. Typically you do not call "
|
|
40
39
|
"this command directly; it is used internally by Metaflow."
|
|
41
40
|
)
|
|
41
|
+
@tracing.cli_entrypoint("kubernetes/step")
|
|
42
42
|
@click.argument("step-name")
|
|
43
43
|
@click.argument("code-package-sha")
|
|
44
44
|
@click.argument("code-package-url")
|
|
@@ -153,8 +153,8 @@ class KubernetesDecorator(StepDecorator):
|
|
|
153
153
|
supports_conda_environment = True
|
|
154
154
|
target_platform = "linux-64"
|
|
155
155
|
|
|
156
|
-
def
|
|
157
|
-
super(KubernetesDecorator, self).
|
|
156
|
+
def init(self):
|
|
157
|
+
super(KubernetesDecorator, self).init()
|
|
158
158
|
|
|
159
159
|
if not self.attributes["namespace"]:
|
|
160
160
|
self.attributes["namespace"] = KUBERNETES_NAMESPACE
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import bz2
|
|
2
|
+
import concurrent.futures
|
|
2
3
|
import io
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
@@ -6,6 +7,9 @@ import shutil
|
|
|
6
7
|
import subprocess
|
|
7
8
|
import sys
|
|
8
9
|
import tarfile
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
import requests
|
|
9
13
|
|
|
10
14
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
11
15
|
from metaflow.plugins import DATASTORES
|
|
@@ -15,6 +19,18 @@ from . import MAGIC_FILE, _datastore_packageroot
|
|
|
15
19
|
|
|
16
20
|
# Bootstraps a valid conda virtual environment composed of conda and pypi packages
|
|
17
21
|
|
|
22
|
+
|
|
23
|
+
def timer(func):
|
|
24
|
+
def wrapper(*args, **kwargs):
|
|
25
|
+
start_time = time.time()
|
|
26
|
+
result = func(*args, **kwargs)
|
|
27
|
+
duration = time.time() - start_time
|
|
28
|
+
# print(f"Time taken for {func.__name__}: {duration:.2f} seconds")
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
return wrapper
|
|
32
|
+
|
|
33
|
+
|
|
18
34
|
if __name__ == "__main__":
|
|
19
35
|
if len(sys.argv) != 5:
|
|
20
36
|
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
|
@@ -47,6 +63,8 @@ if __name__ == "__main__":
|
|
|
47
63
|
|
|
48
64
|
prefix = os.path.join(os.getcwd(), architecture, id_)
|
|
49
65
|
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
|
66
|
+
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
|
67
|
+
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
|
50
68
|
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
|
51
69
|
|
|
52
70
|
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
|
@@ -64,77 +82,194 @@ if __name__ == "__main__":
|
|
|
64
82
|
os.path.join(os.getcwd(), MAGIC_FILE),
|
|
65
83
|
os.path.join(manifest_dir, MAGIC_FILE),
|
|
66
84
|
)
|
|
67
|
-
|
|
68
85
|
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
|
69
86
|
env = json.load(f)[id_][architecture]
|
|
70
87
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
88
|
+
def run_cmd(cmd):
|
|
89
|
+
result = subprocess.run(
|
|
90
|
+
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
91
|
+
)
|
|
92
|
+
if result.returncode != 0:
|
|
93
|
+
print(f"Bootstrap failed while executing: {cmd}")
|
|
94
|
+
print("Stdout:", result.stdout)
|
|
95
|
+
print("Stderr:", result.stderr)
|
|
96
|
+
sys.exit(1)
|
|
97
|
+
|
|
98
|
+
@timer
|
|
99
|
+
def install_micromamba(architecture):
|
|
100
|
+
micromamba_dir = os.path.join(os.getcwd(), "micromamba")
|
|
101
|
+
micromamba_path = os.path.join(micromamba_dir, "bin", "micromamba")
|
|
102
|
+
|
|
103
|
+
if which("micromamba"):
|
|
104
|
+
return which("micromamba")
|
|
105
|
+
if os.path.exists(micromamba_path):
|
|
106
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
|
107
|
+
return micromamba_path
|
|
108
|
+
|
|
109
|
+
# Download and extract in one go
|
|
110
|
+
# TODO: Serve from cloudflare
|
|
111
|
+
url = f"https://micro.mamba.pm/api/micromamba/{architecture}/2.0.4"
|
|
112
|
+
|
|
113
|
+
# Prepare directory once
|
|
114
|
+
os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
|
|
115
|
+
|
|
116
|
+
# Stream and process directly to file
|
|
117
|
+
with requests.get(url, stream=True, timeout=30) as response:
|
|
118
|
+
if response.status_code != 200:
|
|
119
|
+
raise Exception(
|
|
120
|
+
f"Failed to download micromamba: HTTP {response.status_code}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
decompressor = bz2.BZ2Decompressor()
|
|
124
|
+
|
|
125
|
+
# Process in memory without temporary files
|
|
126
|
+
tar_content = decompressor.decompress(response.raw.read())
|
|
127
|
+
|
|
128
|
+
with tarfile.open(fileobj=io.BytesIO(tar_content), mode="r:") as tar:
|
|
129
|
+
member = tar.getmember("bin/micromamba")
|
|
130
|
+
# Extract directly to final location
|
|
131
|
+
with open(micromamba_path, "wb") as f:
|
|
132
|
+
f.write(tar.extractfile(member).read())
|
|
133
|
+
|
|
134
|
+
# Set executable permission
|
|
135
|
+
os.chmod(micromamba_path, 0o755)
|
|
136
|
+
|
|
137
|
+
# Update PATH only once at the end
|
|
138
|
+
os.environ["PATH"] += os.pathsep + os.path.dirname(micromamba_path)
|
|
139
|
+
return micromamba_path
|
|
140
|
+
|
|
141
|
+
@timer
|
|
142
|
+
def download_conda_packages(storage, packages, dest_dir):
|
|
143
|
+
|
|
144
|
+
def process_conda_package(args):
|
|
75
145
|
# Ensure that conda packages go into architecture specific folders.
|
|
76
146
|
# The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
|
|
77
147
|
# Micromamba into believing that all packages are coming from a local
|
|
78
148
|
# channel - the only hurdle is ensuring that packages are organised
|
|
79
149
|
# properly.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
|
|
150
|
+
key, tmpfile, dest_dir = args
|
|
151
|
+
dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
|
83
152
|
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
84
153
|
shutil.move(tmpfile, dest)
|
|
85
154
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
155
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
156
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
|
157
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
158
|
+
executor.map(
|
|
159
|
+
process_conda_package,
|
|
160
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
|
161
|
+
)
|
|
162
|
+
# for key, tmpfile, _ in results:
|
|
163
|
+
|
|
164
|
+
# # TODO: consider RAM disk
|
|
165
|
+
# dest = os.path.join(dest_dir, "/".join(key.split("/")[-2:]))
|
|
166
|
+
# os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
167
|
+
# shutil.move(tmpfile, dest)
|
|
168
|
+
return dest_dir
|
|
169
|
+
|
|
170
|
+
@timer
|
|
171
|
+
def download_pypi_packages(storage, packages, dest_dir):
|
|
172
|
+
|
|
173
|
+
def process_pypi_package(args):
|
|
174
|
+
key, tmpfile, dest_dir = args
|
|
175
|
+
dest = os.path.join(dest_dir, os.path.basename(key))
|
|
176
|
+
shutil.move(tmpfile, dest)
|
|
177
|
+
|
|
178
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
179
|
+
with storage.load_bytes([package["path"] for package in packages]) as results:
|
|
180
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
181
|
+
executor.map(
|
|
182
|
+
process_pypi_package,
|
|
183
|
+
[(key, tmpfile, dest_dir) for key, tmpfile, _ in results],
|
|
184
|
+
)
|
|
185
|
+
# for key, tmpfile, _ in results:
|
|
186
|
+
# dest = os.path.join(dest_dir, os.path.basename(key))
|
|
187
|
+
# shutil.move(tmpfile, dest)
|
|
188
|
+
return dest_dir
|
|
189
|
+
|
|
190
|
+
@timer
|
|
191
|
+
def create_conda_environment(prefix, conda_pkgs_dir):
|
|
192
|
+
cmd = f'''set -e;
|
|
193
|
+
tmpfile=$(mktemp);
|
|
194
|
+
echo "@EXPLICIT" > "$tmpfile";
|
|
195
|
+
ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
|
|
94
196
|
export PATH=$PATH:$(pwd)/micromamba;
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
107
|
-
micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
|
|
108
|
-
rm "$tmpfile"''',
|
|
109
|
-
]
|
|
110
|
-
|
|
111
|
-
# Download PyPI packages.
|
|
112
|
-
if "pypi" in env:
|
|
113
|
-
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
|
114
|
-
with storage.load_bytes(
|
|
115
|
-
[package["path"] for package in env["pypi"]]
|
|
116
|
-
) as results:
|
|
117
|
-
for key, tmpfile, _ in results:
|
|
118
|
-
dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
|
|
119
|
-
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
120
|
-
shutil.move(tmpfile, dest)
|
|
121
|
-
|
|
122
|
-
# Install PyPI packages.
|
|
123
|
-
cmds.extend(
|
|
124
|
-
[
|
|
125
|
-
f"""set -e;
|
|
126
|
-
export PATH=$PATH:$(pwd)/micromamba;
|
|
127
|
-
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
128
|
-
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl --no-user"""
|
|
129
|
-
]
|
|
130
|
-
)
|
|
197
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
198
|
+
export MAMBA_NO_LOW_SPEED_LIMIT=1;
|
|
199
|
+
export MAMBA_USE_INDEX_CACHE=1;
|
|
200
|
+
export MAMBA_NO_PROGRESS_BARS=1;
|
|
201
|
+
export CONDA_FETCH_THREADS=1;
|
|
202
|
+
micromamba create --yes --offline --no-deps \
|
|
203
|
+
--safety-checks=disabled --no-extra-safety-checks \
|
|
204
|
+
--prefix {prefix} --file "$tmpfile" \
|
|
205
|
+
--no-pyc --no-rc --always-copy;
|
|
206
|
+
rm "$tmpfile"'''
|
|
207
|
+
run_cmd(cmd)
|
|
131
208
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
209
|
+
@timer
|
|
210
|
+
def install_pypi_packages(prefix, pypi_pkgs_dir):
|
|
211
|
+
|
|
212
|
+
cmd = f"""set -e;
|
|
213
|
+
export PATH=$PATH:$(pwd)/micromamba;
|
|
214
|
+
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
215
|
+
micromamba run --prefix {prefix} python -m pip --disable-pip-version-check \
|
|
216
|
+
install --root-user-action=ignore --no-compile --no-index \
|
|
217
|
+
--no-cache-dir --no-deps --prefer-binary \
|
|
218
|
+
--find-links={pypi_pkgs_dir} --no-user \
|
|
219
|
+
--no-warn-script-location --no-input \
|
|
220
|
+
{pypi_pkgs_dir}/*.whl
|
|
221
|
+
"""
|
|
222
|
+
run_cmd(cmd)
|
|
223
|
+
|
|
224
|
+
@timer
|
|
225
|
+
def setup_environment(
|
|
226
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
|
227
|
+
):
|
|
228
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
229
|
+
# install micromamba, download conda and pypi packages in parallel
|
|
230
|
+
futures = {
|
|
231
|
+
"micromamba": executor.submit(install_micromamba, architecture),
|
|
232
|
+
"conda_pkgs": executor.submit(
|
|
233
|
+
download_conda_packages, storage, env["conda"], conda_pkgs_dir
|
|
234
|
+
),
|
|
235
|
+
}
|
|
236
|
+
if "pypi" in env:
|
|
237
|
+
futures["pypi_pkgs"] = executor.submit(
|
|
238
|
+
download_pypi_packages, storage, env["pypi"], pypi_pkgs_dir
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# create conda environment after micromamba is installed and conda packages are downloaded
|
|
242
|
+
done, _ = concurrent.futures.wait(
|
|
243
|
+
[futures["micromamba"], futures["conda_pkgs"]],
|
|
244
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
for future in done:
|
|
248
|
+
future.result()
|
|
249
|
+
|
|
250
|
+
# start conda environment creation
|
|
251
|
+
futures["conda_env"] = executor.submit(
|
|
252
|
+
create_conda_environment, prefix, conda_pkgs_dir
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if "pypi" in env:
|
|
256
|
+
# install pypi packages after conda environment is created and pypi packages are downloaded
|
|
257
|
+
done, _ = concurrent.futures.wait(
|
|
258
|
+
[futures["conda_env"], futures["pypi_pkgs"]],
|
|
259
|
+
return_when=concurrent.futures.ALL_COMPLETED,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
for future in done:
|
|
263
|
+
future.result()
|
|
264
|
+
|
|
265
|
+
# install pypi packages
|
|
266
|
+
futures["pypi_install"] = executor.submit(
|
|
267
|
+
install_pypi_packages, prefix, pypi_pkgs_dir
|
|
268
|
+
)
|
|
269
|
+
# wait for pypi packages to be installed
|
|
270
|
+
futures["pypi_install"].result()
|
|
271
|
+
else:
|
|
272
|
+
# wait for conda environment to be created
|
|
273
|
+
futures["conda_env"].result()
|
|
274
|
+
|
|
275
|
+
setup_environment(architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir)
|