ob-metaflow 2.12.39.1__py2.py3-none-any.whl → 2.13.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +7 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +35 -21
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/RECORD +54 -54
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,8 @@ from metaflow.metaflow_config import (
|
|
|
19
19
|
KUBERNETES_GPU_VENDOR,
|
|
20
20
|
KUBERNETES_IMAGE_PULL_POLICY,
|
|
21
21
|
KUBERNETES_MEMORY,
|
|
22
|
+
KUBERNETES_LABELS,
|
|
23
|
+
KUBERNETES_ANNOTATIONS,
|
|
22
24
|
KUBERNETES_NAMESPACE,
|
|
23
25
|
KUBERNETES_NODE_SELECTOR,
|
|
24
26
|
KUBERNETES_PERSISTENT_VOLUME_CLAIMS,
|
|
@@ -34,7 +36,8 @@ from metaflow.sidecar import Sidecar
|
|
|
34
36
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
|
35
37
|
|
|
36
38
|
from ..aws.aws_utils import get_docker_registry, get_ec2_instance_metadata
|
|
37
|
-
from .kubernetes import KubernetesException
|
|
39
|
+
from .kubernetes import KubernetesException
|
|
40
|
+
from .kube_utils import validate_kube_labels, parse_kube_keyvalue_list
|
|
38
41
|
|
|
39
42
|
from metaflow.metaflow_config import MAX_MEMORY_PER_TASK, MAX_CPU_PER_TASK
|
|
40
43
|
|
|
@@ -91,6 +94,10 @@ class KubernetesDecorator(StepDecorator):
|
|
|
91
94
|
tolerations : List[str], default []
|
|
92
95
|
The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
|
|
93
96
|
Kubernetes tolerations to use when launching pod in Kubernetes.
|
|
97
|
+
labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
|
|
98
|
+
Kubernetes labels to use when launching pod in Kubernetes.
|
|
99
|
+
annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
|
|
100
|
+
Kubernetes annotations to use when launching pod in Kubernetes.
|
|
94
101
|
use_tmpfs : bool, default False
|
|
95
102
|
This enables an explicit tmpfs mount for this step.
|
|
96
103
|
tmpfs_tempdir : bool, default True
|
|
@@ -133,6 +140,8 @@ class KubernetesDecorator(StepDecorator):
|
|
|
133
140
|
"gpu_vendor": None,
|
|
134
141
|
"tolerations": None, # e.g., [{"key": "arch", "operator": "Equal", "value": "amd"},
|
|
135
142
|
# {"key": "foo", "operator": "Equal", "value": "bar"}]
|
|
143
|
+
"labels": None, # e.g. {"test-label": "value", "another-label":"value2"}
|
|
144
|
+
"annotations": None, # e.g. {"note": "value", "another-note": "value2"}
|
|
136
145
|
"use_tmpfs": None,
|
|
137
146
|
"tmpfs_tempdir": True,
|
|
138
147
|
"tmpfs_size": None,
|
|
@@ -219,6 +228,36 @@ class KubernetesDecorator(StepDecorator):
|
|
|
219
228
|
self.attributes["memory"] = KUBERNETES_MEMORY
|
|
220
229
|
if self.attributes["disk"] == self.defaults["disk"] and KUBERNETES_DISK:
|
|
221
230
|
self.attributes["disk"] = KUBERNETES_DISK
|
|
231
|
+
# Label source precedence (decreasing):
|
|
232
|
+
# - System labels (set outside of decorator)
|
|
233
|
+
# - Decorator labels: @kubernetes(labels={})
|
|
234
|
+
# - Environment variable labels: METAFLOW_KUBERNETES_LABELS=
|
|
235
|
+
deco_labels = {}
|
|
236
|
+
if self.attributes["labels"] is not None:
|
|
237
|
+
deco_labels = self.attributes["labels"]
|
|
238
|
+
|
|
239
|
+
env_labels = {}
|
|
240
|
+
if KUBERNETES_LABELS:
|
|
241
|
+
env_labels = parse_kube_keyvalue_list(KUBERNETES_LABELS.split(","), False)
|
|
242
|
+
|
|
243
|
+
self.attributes["labels"] = {**env_labels, **deco_labels}
|
|
244
|
+
|
|
245
|
+
# Annotations
|
|
246
|
+
# annotation precedence (decreasing):
|
|
247
|
+
# - System annotations (set outside of decorator)
|
|
248
|
+
# - Decorator annotations: @kubernetes(annotations={})
|
|
249
|
+
# - Environment annotations: METAFLOW_KUBERNETES_ANNOTATIONS=
|
|
250
|
+
deco_annotations = {}
|
|
251
|
+
if self.attributes["annotations"] is not None:
|
|
252
|
+
deco_annotations = self.attributes["annotations"]
|
|
253
|
+
|
|
254
|
+
env_annotations = {}
|
|
255
|
+
if KUBERNETES_ANNOTATIONS:
|
|
256
|
+
env_annotations = parse_kube_keyvalue_list(
|
|
257
|
+
KUBERNETES_ANNOTATIONS.split(","), False
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
self.attributes["annotations"] = {**env_annotations, **deco_annotations}
|
|
222
261
|
|
|
223
262
|
# If no docker image is explicitly specified, impute a default image.
|
|
224
263
|
if not self.attributes["image"]:
|
|
@@ -386,6 +425,9 @@ class KubernetesDecorator(StepDecorator):
|
|
|
386
425
|
)
|
|
387
426
|
)
|
|
388
427
|
|
|
428
|
+
validate_kube_labels(self.attributes["labels"])
|
|
429
|
+
# TODO: add validation to annotations as well?
|
|
430
|
+
|
|
389
431
|
def package_init(self, flow, step_name, environment):
|
|
390
432
|
try:
|
|
391
433
|
# Kubernetes is a soft dependency.
|
|
@@ -441,7 +483,12 @@ class KubernetesDecorator(StepDecorator):
|
|
|
441
483
|
"=".join([key, str(val)]) if val else key
|
|
442
484
|
for key, val in v.items()
|
|
443
485
|
]
|
|
444
|
-
elif k in [
|
|
486
|
+
elif k in [
|
|
487
|
+
"tolerations",
|
|
488
|
+
"persistent_volume_claims",
|
|
489
|
+
"labels",
|
|
490
|
+
"annotations",
|
|
491
|
+
]:
|
|
445
492
|
cli_args.command_options[k] = json.dumps(v)
|
|
446
493
|
else:
|
|
447
494
|
cli_args.command_options[k] = v
|
|
@@ -1,23 +1,20 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import json
|
|
3
2
|
import math
|
|
4
3
|
import random
|
|
5
|
-
import sys
|
|
6
4
|
import time
|
|
7
5
|
|
|
8
6
|
from metaflow.exception import MetaflowException
|
|
9
7
|
from metaflow.metaflow_config import KUBERNETES_SECRETS
|
|
10
8
|
from metaflow.tracing import inject_tracing_vars
|
|
11
|
-
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
12
9
|
from metaflow.metaflow_config_funcs import init_config
|
|
13
10
|
|
|
14
11
|
CLIENT_REFRESH_INTERVAL_SECONDS = 300
|
|
12
|
+
|
|
13
|
+
from .kube_utils import qos_requests_and_limits
|
|
15
14
|
from .kubernetes_jobsets import (
|
|
16
15
|
KubernetesJobSet,
|
|
17
16
|
) # We need this import for Kubernetes Client.
|
|
18
17
|
|
|
19
|
-
from .kube_utils import qos_requests_and_limits
|
|
20
|
-
|
|
21
18
|
|
|
22
19
|
class KubernetesJobException(MetaflowException):
|
|
23
20
|
headline = "Kubernetes job error"
|
|
@@ -451,7 +448,7 @@ class RunningJob(object):
|
|
|
451
448
|
def best_effort_kill():
|
|
452
449
|
try:
|
|
453
450
|
self.kill()
|
|
454
|
-
except Exception
|
|
451
|
+
except Exception:
|
|
455
452
|
pass
|
|
456
453
|
|
|
457
454
|
atexit.register(best_effort_kill)
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import json
|
|
3
2
|
import math
|
|
4
3
|
import random
|
|
@@ -7,7 +6,6 @@ from collections import namedtuple
|
|
|
7
6
|
from metaflow.exception import MetaflowException
|
|
8
7
|
from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_VERSION
|
|
9
8
|
from metaflow.tracing import inject_tracing_vars
|
|
10
|
-
from metaflow.metaflow_config import KUBERNETES_SECRETS
|
|
11
9
|
|
|
12
10
|
from .kube_utils import qos_requests_and_limits
|
|
13
11
|
|
|
@@ -257,7 +255,7 @@ class RunningJobSet(object):
|
|
|
257
255
|
def best_effort_kill():
|
|
258
256
|
try:
|
|
259
257
|
self.kill()
|
|
260
|
-
except Exception
|
|
258
|
+
except Exception:
|
|
261
259
|
pass
|
|
262
260
|
|
|
263
261
|
atexit.register(best_effort_kill)
|
|
@@ -342,7 +340,7 @@ class RunningJobSet(object):
|
|
|
342
340
|
stdout=True,
|
|
343
341
|
tty=False,
|
|
344
342
|
)
|
|
345
|
-
except Exception
|
|
343
|
+
except Exception:
|
|
346
344
|
with client.ApiClient() as api_client:
|
|
347
345
|
# If we are unable to kill the control pod then
|
|
348
346
|
# Delete the jobset to kill the subsequent pods.
|
|
@@ -862,6 +860,16 @@ class KubernetesJobSet(object):
|
|
|
862
860
|
self._annotations = dict(self._annotations, **{name: value})
|
|
863
861
|
return self
|
|
864
862
|
|
|
863
|
+
def labels(self, labels):
|
|
864
|
+
for k, v in labels.items():
|
|
865
|
+
self.label(k, v)
|
|
866
|
+
return self
|
|
867
|
+
|
|
868
|
+
def annotations(self, annotations):
|
|
869
|
+
for k, v in annotations.items():
|
|
870
|
+
self.annotation(k, v)
|
|
871
|
+
return self
|
|
872
|
+
|
|
865
873
|
def secret(self, name):
|
|
866
874
|
self.worker.secret(name)
|
|
867
875
|
self.control.secret(name)
|
|
@@ -987,15 +995,24 @@ class KubernetesArgoJobSet(object):
|
|
|
987
995
|
self._labels = dict(self._labels, **{name: value})
|
|
988
996
|
return self
|
|
989
997
|
|
|
998
|
+
def labels(self, labels):
|
|
999
|
+
for k, v in labels.items():
|
|
1000
|
+
self.label(k, v)
|
|
1001
|
+
return self
|
|
1002
|
+
|
|
990
1003
|
def annotation(self, name, value):
|
|
991
1004
|
self.worker.annotation(name, value)
|
|
992
1005
|
self.control.annotation(name, value)
|
|
993
1006
|
self._annotations = dict(self._annotations, **{name: value})
|
|
994
1007
|
return self
|
|
995
1008
|
|
|
1009
|
+
def annotations(self, annotations):
|
|
1010
|
+
for k, v in annotations.items():
|
|
1011
|
+
self.annotation(k, v)
|
|
1012
|
+
return self
|
|
1013
|
+
|
|
996
1014
|
def dump(self):
|
|
997
1015
|
client = self._kubernetes_sdk
|
|
998
|
-
import json
|
|
999
1016
|
|
|
1000
1017
|
data = json.dumps(
|
|
1001
1018
|
client.ApiClient().sanitize_for_serialization(
|
|
@@ -8,12 +8,14 @@ import subprocess
|
|
|
8
8
|
import sys
|
|
9
9
|
import tarfile
|
|
10
10
|
import time
|
|
11
|
-
|
|
12
|
-
import
|
|
13
|
-
|
|
11
|
+
from urllib.error import URLError
|
|
12
|
+
from urllib.request import urlopen
|
|
14
13
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
15
14
|
from metaflow.plugins import DATASTORES
|
|
15
|
+
from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
|
|
16
16
|
from metaflow.util import which
|
|
17
|
+
from urllib.request import Request
|
|
18
|
+
import warnings
|
|
17
19
|
|
|
18
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
|
19
21
|
|
|
@@ -32,11 +34,6 @@ def timer(func):
|
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
if __name__ == "__main__":
|
|
35
|
-
if len(sys.argv) != 5:
|
|
36
|
-
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
|
37
|
-
sys.exit(1)
|
|
38
|
-
_, flow_name, id_, datastore_type, architecture = sys.argv
|
|
39
|
-
|
|
40
37
|
# TODO: Detect architecture on the fly when dealing with arm architectures.
|
|
41
38
|
# ARCH=$(uname -m)
|
|
42
39
|
# OS=$(uname)
|
|
@@ -61,30 +58,6 @@ if __name__ == "__main__":
|
|
|
61
58
|
# fi
|
|
62
59
|
# fi
|
|
63
60
|
|
|
64
|
-
prefix = os.path.join(os.getcwd(), architecture, id_)
|
|
65
|
-
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
|
66
|
-
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
|
67
|
-
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
|
68
|
-
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
|
69
|
-
|
|
70
|
-
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
|
71
|
-
if not datastores:
|
|
72
|
-
print(f"No datastore found for type: {datastore_type}")
|
|
73
|
-
sys.exit(1)
|
|
74
|
-
|
|
75
|
-
storage = datastores[0](
|
|
76
|
-
_datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
# Move MAGIC_FILE inside local datastore.
|
|
80
|
-
os.makedirs(manifest_dir, exist_ok=True)
|
|
81
|
-
shutil.move(
|
|
82
|
-
os.path.join(os.getcwd(), MAGIC_FILE),
|
|
83
|
-
os.path.join(manifest_dir, MAGIC_FILE),
|
|
84
|
-
)
|
|
85
|
-
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
|
86
|
-
env = json.load(f)[id_][architecture]
|
|
87
|
-
|
|
88
61
|
def run_cmd(cmd):
|
|
89
62
|
result = subprocess.run(
|
|
90
63
|
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
@@ -107,29 +80,55 @@ if __name__ == "__main__":
|
|
|
107
80
|
return micromamba_path
|
|
108
81
|
|
|
109
82
|
# Download and extract in one go
|
|
110
|
-
|
|
111
|
-
|
|
83
|
+
url = MICROMAMBA_URL.format(platform=architecture, version="2.0.4")
|
|
84
|
+
mirror_url = MICROMAMBA_MIRROR_URL.format(
|
|
85
|
+
platform=architecture, version="2.0.4"
|
|
86
|
+
)
|
|
112
87
|
|
|
113
88
|
# Prepare directory once
|
|
114
89
|
os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
|
|
115
90
|
|
|
116
|
-
#
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
decompressor = bz2.BZ2Decompressor()
|
|
124
|
-
|
|
125
|
-
# Process in memory without temporary files
|
|
126
|
-
tar_content = decompressor.decompress(response.raw.read())
|
|
91
|
+
# Download and decompress in one go
|
|
92
|
+
def _download_and_extract(url):
|
|
93
|
+
headers = {
|
|
94
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
95
|
+
"Connection": "keep-alive",
|
|
96
|
+
"User-Agent": "python-urllib",
|
|
97
|
+
}
|
|
127
98
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
99
|
+
max_retries = 3
|
|
100
|
+
for attempt in range(max_retries):
|
|
101
|
+
try:
|
|
102
|
+
req = Request(url, headers=headers)
|
|
103
|
+
|
|
104
|
+
with urlopen(req) as response:
|
|
105
|
+
decompressor = bz2.BZ2Decompressor()
|
|
106
|
+
with warnings.catch_warnings():
|
|
107
|
+
warnings.filterwarnings(
|
|
108
|
+
"ignore", category=DeprecationWarning
|
|
109
|
+
)
|
|
110
|
+
with tarfile.open(
|
|
111
|
+
fileobj=io.BytesIO(
|
|
112
|
+
decompressor.decompress(response.read())
|
|
113
|
+
),
|
|
114
|
+
mode="r:",
|
|
115
|
+
) as tar:
|
|
116
|
+
member = tar.getmember("bin/micromamba")
|
|
117
|
+
tar.extract(member, micromamba_dir)
|
|
118
|
+
break
|
|
119
|
+
except (URLError, IOError) as e:
|
|
120
|
+
if attempt == max_retries - 1:
|
|
121
|
+
raise Exception(
|
|
122
|
+
f"Failed to download micromamba after {max_retries} attempts: {e}"
|
|
123
|
+
)
|
|
124
|
+
time.sleep(2**attempt)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
# first try from mirror
|
|
128
|
+
_download_and_extract(mirror_url)
|
|
129
|
+
except Exception:
|
|
130
|
+
# download from mirror failed, try official source before failing.
|
|
131
|
+
_download_and_extract(url)
|
|
133
132
|
|
|
134
133
|
# Set executable permission
|
|
135
134
|
os.chmod(micromamba_path, 0o755)
|
|
@@ -140,7 +139,6 @@ if __name__ == "__main__":
|
|
|
140
139
|
|
|
141
140
|
@timer
|
|
142
141
|
def download_conda_packages(storage, packages, dest_dir):
|
|
143
|
-
|
|
144
142
|
def process_conda_package(args):
|
|
145
143
|
# Ensure that conda packages go into architecture specific folders.
|
|
146
144
|
# The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
|
|
@@ -169,7 +167,6 @@ if __name__ == "__main__":
|
|
|
169
167
|
|
|
170
168
|
@timer
|
|
171
169
|
def download_pypi_packages(storage, packages, dest_dir):
|
|
172
|
-
|
|
173
170
|
def process_pypi_package(args):
|
|
174
171
|
key, tmpfile, dest_dir = args
|
|
175
172
|
dest = os.path.join(dest_dir, os.path.basename(key))
|
|
@@ -208,7 +205,6 @@ if __name__ == "__main__":
|
|
|
208
205
|
|
|
209
206
|
@timer
|
|
210
207
|
def install_pypi_packages(prefix, pypi_pkgs_dir):
|
|
211
|
-
|
|
212
208
|
cmd = f"""set -e;
|
|
213
209
|
export PATH=$PATH:$(pwd)/micromamba;
|
|
214
210
|
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
|
@@ -272,4 +268,41 @@ if __name__ == "__main__":
|
|
|
272
268
|
# wait for conda environment to be created
|
|
273
269
|
futures["conda_env"].result()
|
|
274
270
|
|
|
275
|
-
|
|
271
|
+
if len(sys.argv) != 5:
|
|
272
|
+
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
|
273
|
+
sys.exit(1)
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
_, flow_name, id_, datastore_type, architecture = sys.argv
|
|
277
|
+
|
|
278
|
+
prefix = os.path.join(os.getcwd(), architecture, id_)
|
|
279
|
+
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
|
280
|
+
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
|
281
|
+
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
|
282
|
+
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
|
283
|
+
|
|
284
|
+
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
|
285
|
+
if not datastores:
|
|
286
|
+
print(f"No datastore found for type: {datastore_type}")
|
|
287
|
+
sys.exit(1)
|
|
288
|
+
|
|
289
|
+
storage = datastores[0](
|
|
290
|
+
_datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Move MAGIC_FILE inside local datastore.
|
|
294
|
+
os.makedirs(manifest_dir, exist_ok=True)
|
|
295
|
+
shutil.move(
|
|
296
|
+
os.path.join(os.getcwd(), MAGIC_FILE),
|
|
297
|
+
os.path.join(manifest_dir, MAGIC_FILE),
|
|
298
|
+
)
|
|
299
|
+
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
|
300
|
+
env = json.load(f)[id_][architecture]
|
|
301
|
+
|
|
302
|
+
setup_environment(
|
|
303
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
except Exception as e:
|
|
307
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
|
308
|
+
sys.exit(1)
|
|
@@ -7,20 +7,15 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
import tarfile
|
|
9
9
|
import threading
|
|
10
|
-
import time
|
|
11
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
11
|
from functools import wraps
|
|
13
12
|
from hashlib import sha256
|
|
14
13
|
from io import BufferedIOBase, BytesIO
|
|
15
|
-
from itertools import chain
|
|
16
14
|
from urllib.parse import unquote, urlparse
|
|
17
15
|
|
|
18
|
-
import requests
|
|
19
|
-
|
|
20
16
|
from metaflow.exception import MetaflowException
|
|
21
17
|
from metaflow.metaflow_config import get_pinned_conda_libs
|
|
22
18
|
from metaflow.metaflow_environment import MetaflowEnvironment
|
|
23
|
-
from metaflow.metaflow_profile import profile
|
|
24
19
|
|
|
25
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
|
26
21
|
from .utils import conda_platform
|
|
@@ -501,6 +496,7 @@ class LazyOpen(BufferedIOBase):
|
|
|
501
496
|
self._file = None
|
|
502
497
|
self._buffer = None
|
|
503
498
|
self._position = 0
|
|
499
|
+
self.requests = None
|
|
504
500
|
|
|
505
501
|
def _ensure_file(self):
|
|
506
502
|
if not self._file:
|
|
@@ -517,8 +513,13 @@ class LazyOpen(BufferedIOBase):
|
|
|
517
513
|
raise ValueError("Both filename and url are missing")
|
|
518
514
|
|
|
519
515
|
def _download_to_buffer(self):
|
|
516
|
+
if self.requests is None:
|
|
517
|
+
# TODO: Remove dependency on requests
|
|
518
|
+
import requests
|
|
519
|
+
|
|
520
|
+
self.requests = requests
|
|
520
521
|
# TODO: Stream it in chunks?
|
|
521
|
-
response = requests.get(self.url, stream=True)
|
|
522
|
+
response = self.requests.get(self.url, stream=True)
|
|
522
523
|
response.raise_for_status()
|
|
523
524
|
return response.content
|
|
524
525
|
|
|
@@ -8,7 +8,7 @@ import time
|
|
|
8
8
|
from metaflow.exception import MetaflowException
|
|
9
9
|
from metaflow.util import which
|
|
10
10
|
|
|
11
|
-
from .utils import conda_platform
|
|
11
|
+
from .utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL, conda_platform
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class MicromambaException(MetaflowException):
|
|
@@ -323,7 +323,7 @@ class Micromamba(object):
|
|
|
323
323
|
stderr="\n".join(err),
|
|
324
324
|
)
|
|
325
325
|
)
|
|
326
|
-
except (TypeError, ValueError)
|
|
326
|
+
except (TypeError, ValueError):
|
|
327
327
|
pass
|
|
328
328
|
raise MicromambaException(
|
|
329
329
|
msg.format(
|
|
@@ -339,23 +339,37 @@ def _install_micromamba(installation_location):
|
|
|
339
339
|
# Unfortunately no 32bit binaries are available for micromamba, which ideally
|
|
340
340
|
# shouldn't be much of a problem in today's world.
|
|
341
341
|
platform = conda_platform()
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
# requires bzip2
|
|
346
|
-
result = subprocess.Popen(
|
|
347
|
-
f"curl -Ls https://micro.mamba.pm/api/micromamba/{platform}/1.5.7 | tar -xvj -C {installation_location} bin/micromamba",
|
|
348
|
-
shell=True,
|
|
349
|
-
stderr=subprocess.PIPE,
|
|
350
|
-
stdout=subprocess.PIPE,
|
|
351
|
-
)
|
|
352
|
-
_, err = result.communicate()
|
|
353
|
-
if result.returncode != 0:
|
|
354
|
-
raise MicromambaException(
|
|
355
|
-
f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
|
|
356
|
-
)
|
|
342
|
+
url = MICROMAMBA_URL.format(platform=platform, version="1.5.7")
|
|
343
|
+
mirror_url = MICROMAMBA_MIRROR_URL.format(platform=platform, version="1.5.7")
|
|
344
|
+
os.makedirs(installation_location, exist_ok=True)
|
|
357
345
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
346
|
+
def _download_and_extract(url):
|
|
347
|
+
max_retries = 3
|
|
348
|
+
for attempt in range(max_retries):
|
|
349
|
+
try:
|
|
350
|
+
# https://mamba.readthedocs.io/en/latest/micromamba-installation.html#manual-installation
|
|
351
|
+
# requires bzip2
|
|
352
|
+
result = subprocess.Popen(
|
|
353
|
+
f"curl -Ls {url} | tar -xvj -C {installation_location} bin/micromamba",
|
|
354
|
+
shell=True,
|
|
355
|
+
stderr=subprocess.PIPE,
|
|
356
|
+
stdout=subprocess.PIPE,
|
|
357
|
+
)
|
|
358
|
+
_, err = result.communicate()
|
|
359
|
+
if result.returncode != 0:
|
|
360
|
+
raise MicromambaException(
|
|
361
|
+
f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
|
|
362
|
+
)
|
|
363
|
+
except subprocess.CalledProcessError as e:
|
|
364
|
+
if attempt == max_retries - 1:
|
|
365
|
+
raise MicromambaException(
|
|
366
|
+
"Micromamba installation failed:\n{}".format(e.stderr.decode())
|
|
367
|
+
)
|
|
368
|
+
time.sleep(2**attempt)
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
# prioritize downloading from mirror
|
|
372
|
+
_download_and_extract(mirror_url)
|
|
373
|
+
except Exception:
|
|
374
|
+
# download from official source as a fallback
|
|
375
|
+
_download_and_extract(url)
|
metaflow/plugins/pypi/pip.py
CHANGED
|
@@ -4,7 +4,6 @@ import re
|
|
|
4
4
|
import shutil
|
|
5
5
|
import subprocess
|
|
6
6
|
import tempfile
|
|
7
|
-
import time
|
|
8
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
9
8
|
from itertools import chain, product
|
|
10
9
|
from urllib.parse import unquote
|
|
@@ -107,9 +106,8 @@ class Pip(object):
|
|
|
107
106
|
except PipPackageNotFound as ex:
|
|
108
107
|
# pretty print package errors
|
|
109
108
|
raise PipException(
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"Note that ***@pypi*** does not currently support source distributions"
|
|
109
|
+
"Unable to find a binary distribution compatible with %s for %s.\n\n"
|
|
110
|
+
"Note: ***@pypi*** does not currently support source distributions"
|
|
113
111
|
% (ex.package_spec, platform)
|
|
114
112
|
)
|
|
115
113
|
|
metaflow/plugins/pypi/utils.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import platform
|
|
3
2
|
import sys
|
|
4
3
|
|
|
@@ -17,10 +16,13 @@ else:
|
|
|
17
16
|
from metaflow._vendor.packaging import tags
|
|
18
17
|
from metaflow._vendor.packaging.utils import parse_wheel_filename
|
|
19
18
|
|
|
20
|
-
from urllib.parse import unquote
|
|
19
|
+
from urllib.parse import unquote
|
|
21
20
|
|
|
22
21
|
from metaflow.exception import MetaflowException
|
|
23
22
|
|
|
23
|
+
MICROMAMBA_URL = "https://micro.mamba.pm/api/micromamba/{platform}/{version}"
|
|
24
|
+
MICROMAMBA_MIRROR_URL = "https://micromamba.outerbounds.sh/{platform}/{version}.tar.bz2"
|
|
25
|
+
|
|
24
26
|
|
|
25
27
|
def conda_platform():
|
|
26
28
|
# Returns the conda platform for the Python interpreter
|