metaflow 2.14.3__py2.py3-none-any.whl → 2.15.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +7 -0
- metaflow/cmd/code/__init__.py +230 -0
- metaflow/cmd/develop/stub_generator.py +5 -2
- metaflow/cmd/main_cli.py +1 -0
- metaflow/cmd/make_wrapper.py +16 -0
- metaflow/metaflow_config.py +2 -0
- metaflow/metaflow_environment.py +3 -1
- metaflow/mflog/__init__.py +4 -3
- metaflow/plugins/aws/batch/batch_cli.py +4 -4
- metaflow/plugins/aws/batch/batch_decorator.py +8 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -1
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +2 -0
- metaflow/plugins/pypi/bootstrap.py +18 -27
- metaflow/plugins/pypi/conda_environment.py +8 -8
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/utils.py +18 -0
- metaflow/runner/subprocess_manager.py +12 -6
- metaflow/version.py +1 -1
- metaflow-2.15.1.data/data/share/metaflow/devtools/Makefile +322 -0
- metaflow-2.15.1.data/data/share/metaflow/devtools/Tiltfile +620 -0
- metaflow-2.15.1.data/data/share/metaflow/devtools/pick_services.sh +104 -0
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/METADATA +3 -3
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/RECORD +27 -21
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/WHEEL +1 -1
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/entry_points.txt +1 -0
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/LICENSE +0 -0
- {metaflow-2.14.3.dist-info → metaflow-2.15.1.dist-info}/top_level.txt +0 -0
metaflow/__init__.py
CHANGED
@@ -119,6 +119,13 @@ from .includefile import IncludeFile
|
|
119
119
|
# Decorators
|
120
120
|
from .decorators import step, _import_plugin_decorators
|
121
121
|
|
122
|
+
# Config parsers
|
123
|
+
from .plugins.pypi.parsers import (
|
124
|
+
requirements_txt_parser,
|
125
|
+
pyproject_toml_parser,
|
126
|
+
conda_environment_yml_parser,
|
127
|
+
)
|
128
|
+
|
122
129
|
# this auto-generates decorator functions from Decorator objects
|
123
130
|
# in the top-level metaflow namespace
|
124
131
|
_import_plugin_decorators(globals())
|
@@ -0,0 +1,230 @@
|
|
1
|
+
import os
|
2
|
+
import shutil
|
3
|
+
import sys
|
4
|
+
from subprocess import PIPE, CompletedProcess, run
|
5
|
+
from tempfile import TemporaryDirectory
|
6
|
+
from typing import Any, Callable, List, Mapping, Optional, cast
|
7
|
+
|
8
|
+
from metaflow import Run
|
9
|
+
from metaflow._vendor import click
|
10
|
+
from metaflow.cli import echo_always
|
11
|
+
|
12
|
+
|
13
|
+
@click.group()
|
14
|
+
def cli():
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
@cli.group(help="Access, compare, and manage code associated with Metaflow runs.")
|
19
|
+
def code():
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
def echo(line: str) -> None:
|
24
|
+
echo_always(line, err=True, fg="magenta")
|
25
|
+
|
26
|
+
|
27
|
+
def extract_code_package(runspec: str) -> TemporaryDirectory:
|
28
|
+
try:
|
29
|
+
mf_run = Run(runspec, _namespace_check=False)
|
30
|
+
echo(f"✅ Run *{runspec}* found, downloading code..")
|
31
|
+
except Exception as e:
|
32
|
+
echo(f"❌ Run **{runspec}** not found")
|
33
|
+
raise e
|
34
|
+
|
35
|
+
if mf_run.code is None:
|
36
|
+
echo(
|
37
|
+
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
|
38
|
+
)
|
39
|
+
raise RuntimeError("no code package found")
|
40
|
+
|
41
|
+
return mf_run.code.extract()
|
42
|
+
|
43
|
+
|
44
|
+
def perform_diff(
|
45
|
+
source_dir: str,
|
46
|
+
target_dir: Optional[str] = None,
|
47
|
+
output: bool = False,
|
48
|
+
**kwargs: Mapping[str, Any],
|
49
|
+
) -> Optional[List[str]]:
|
50
|
+
if target_dir is None:
|
51
|
+
target_dir = os.getcwd()
|
52
|
+
|
53
|
+
diffs = []
|
54
|
+
for dirpath, dirnames, filenames in os.walk(source_dir, followlinks=True):
|
55
|
+
for fname in filenames:
|
56
|
+
# NOTE: the paths below need to be set up carefully
|
57
|
+
# for the `patch` command to work. Better not to touch
|
58
|
+
# the directories below. If you must, test that patches
|
59
|
+
# work after your changes.
|
60
|
+
#
|
61
|
+
# target_file is the git repo in the current working directory
|
62
|
+
rel = os.path.relpath(dirpath, source_dir)
|
63
|
+
target_file = os.path.join(rel, fname)
|
64
|
+
# source_file is the run file loaded in a tmp directory
|
65
|
+
source_file = os.path.join(dirpath, fname)
|
66
|
+
|
67
|
+
if sys.stdout.isatty() and not output:
|
68
|
+
color = ["--color"]
|
69
|
+
else:
|
70
|
+
color = ["--no-color"]
|
71
|
+
|
72
|
+
if os.path.exists(os.path.join(target_dir, target_file)):
|
73
|
+
cmd = (
|
74
|
+
["git", "diff", "--no-index", "--exit-code"]
|
75
|
+
+ color
|
76
|
+
+ [
|
77
|
+
target_file,
|
78
|
+
source_file,
|
79
|
+
]
|
80
|
+
)
|
81
|
+
result: CompletedProcess = run(
|
82
|
+
cmd, text=True, stdout=PIPE, cwd=target_dir
|
83
|
+
)
|
84
|
+
if result.returncode == 0:
|
85
|
+
if not output:
|
86
|
+
echo(f"✅ {target_file} is identical, skipping")
|
87
|
+
continue
|
88
|
+
|
89
|
+
if output:
|
90
|
+
diffs.append(result.stdout)
|
91
|
+
else:
|
92
|
+
run(["less", "-R"], input=result.stdout, text=True)
|
93
|
+
else:
|
94
|
+
if not output:
|
95
|
+
echo(f"❗ {target_file} not in the target directory, skipping")
|
96
|
+
return diffs if output else None
|
97
|
+
|
98
|
+
|
99
|
+
def run_op(
|
100
|
+
runspec: str, op: Callable[..., Optional[List[str]]], **op_args: Mapping[str, Any]
|
101
|
+
) -> Optional[List[str]]:
|
102
|
+
tmp = None
|
103
|
+
try:
|
104
|
+
tmp = extract_code_package(runspec)
|
105
|
+
return op(tmp.name, **op_args)
|
106
|
+
finally:
|
107
|
+
if tmp and os.path.exists(tmp.name):
|
108
|
+
shutil.rmtree(tmp.name)
|
109
|
+
|
110
|
+
|
111
|
+
def run_op_diff_runs(
|
112
|
+
source_run_pathspec: str, target_run_pathspec: str, **op_args: Mapping[str, Any]
|
113
|
+
) -> Optional[List[str]]:
|
114
|
+
source_tmp = None
|
115
|
+
target_tmp = None
|
116
|
+
try:
|
117
|
+
source_tmp = extract_code_package(source_run_pathspec)
|
118
|
+
target_tmp = extract_code_package(target_run_pathspec)
|
119
|
+
return perform_diff(source_tmp.name, target_tmp.name, **op_args)
|
120
|
+
finally:
|
121
|
+
for d in [source_tmp, target_tmp]:
|
122
|
+
if d and os.path.exists(d.name):
|
123
|
+
shutil.rmtree(d.name)
|
124
|
+
|
125
|
+
|
126
|
+
def op_diff(tmpdir: str, **kwargs: Mapping[str, Any]) -> Optional[List[str]]:
|
127
|
+
kwargs_dict = dict(kwargs)
|
128
|
+
target_dir = cast(Optional[str], kwargs_dict.pop("target_dir", None))
|
129
|
+
output: bool = bool(kwargs_dict.pop("output", False))
|
130
|
+
op_args: Mapping[str, Any] = {**kwargs_dict}
|
131
|
+
return perform_diff(tmpdir, target_dir=target_dir, output=output, **op_args)
|
132
|
+
|
133
|
+
|
134
|
+
def op_pull(tmpdir: str, dst: str, **op_args: Mapping[str, Any]) -> None:
|
135
|
+
if os.path.exists(dst):
|
136
|
+
echo(f"❌ Directory *{dst}* already exists")
|
137
|
+
else:
|
138
|
+
shutil.move(tmpdir, dst)
|
139
|
+
echo(f"Code downloaded to *{dst}*")
|
140
|
+
|
141
|
+
|
142
|
+
def op_patch(tmpdir: str, dst: str, **kwargs: Mapping[str, Any]) -> None:
|
143
|
+
diffs = perform_diff(tmpdir, output=True) or []
|
144
|
+
with open(dst, "w", encoding="utf-8") as f:
|
145
|
+
for out in diffs:
|
146
|
+
out = out.replace(tmpdir, "/.")
|
147
|
+
out = out.replace("+++ b/./", "+++ b/")
|
148
|
+
out = out.replace("--- b/./", "--- b/")
|
149
|
+
out = out.replace("--- a/./", "--- a/")
|
150
|
+
out = out.replace("+++ a/./", "+++ a/")
|
151
|
+
f.write(out)
|
152
|
+
echo(f"Patch saved in *{dst}*")
|
153
|
+
path = run(
|
154
|
+
["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE
|
155
|
+
).stdout.strip()
|
156
|
+
if path:
|
157
|
+
diropt = f" --directory={path.rstrip('/')}"
|
158
|
+
else:
|
159
|
+
diropt = ""
|
160
|
+
echo("Apply the patch by running:")
|
161
|
+
echo_always(
|
162
|
+
f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True
|
163
|
+
)
|
164
|
+
|
165
|
+
|
166
|
+
@code.command()
|
167
|
+
@click.argument("run_pathspec")
|
168
|
+
def diff(run_pathspec: str, **kwargs: Mapping[str, Any]) -> None:
|
169
|
+
"""
|
170
|
+
Do a 'git diff' of the current directory and a Metaflow run.
|
171
|
+
"""
|
172
|
+
_ = run_op(run_pathspec, op_diff, **kwargs)
|
173
|
+
|
174
|
+
|
175
|
+
@code.command()
|
176
|
+
@click.argument("source_run_pathspec")
|
177
|
+
@click.argument("target_run_pathspec")
|
178
|
+
def diff_runs(
|
179
|
+
source_run_pathspec: str, target_run_pathspec: str, **kwargs: Mapping[str, Any]
|
180
|
+
) -> None:
|
181
|
+
"""
|
182
|
+
Do a 'git diff' between two Metaflow runs.
|
183
|
+
"""
|
184
|
+
_ = run_op_diff_runs(source_run_pathspec, target_run_pathspec, **kwargs)
|
185
|
+
|
186
|
+
|
187
|
+
@code.command()
|
188
|
+
@click.argument("run_pathspec")
|
189
|
+
@click.option(
|
190
|
+
"--dir", help="Destination directory (default: {run_pathspec}_code)", default=None
|
191
|
+
)
|
192
|
+
def pull(
|
193
|
+
run_pathspec: str, dir: Optional[str] = None, **kwargs: Mapping[str, Any]
|
194
|
+
) -> None:
|
195
|
+
"""
|
196
|
+
Pull the code of a Metaflow run.
|
197
|
+
"""
|
198
|
+
if dir is None:
|
199
|
+
dir = run_pathspec.lower().replace("/", "_") + "_code"
|
200
|
+
op_args: Mapping[str, Any] = {**kwargs, "dst": dir}
|
201
|
+
run_op(run_pathspec, op_pull, **op_args)
|
202
|
+
|
203
|
+
|
204
|
+
@code.command()
|
205
|
+
@click.argument("run_pathspec")
|
206
|
+
@click.option(
|
207
|
+
"--file_path",
|
208
|
+
help="Patch file name. If not provided, defaults to a sanitized version of RUN_PATHSPEC "
|
209
|
+
"with slashes replaced by underscores, plus '.patch'.",
|
210
|
+
show_default=False,
|
211
|
+
)
|
212
|
+
@click.option(
|
213
|
+
"--overwrite", is_flag=True, help="Overwrite the patch file if it exists."
|
214
|
+
)
|
215
|
+
def patch(
|
216
|
+
run_pathspec: str,
|
217
|
+
file_path: Optional[str] = None,
|
218
|
+
overwrite: bool = False,
|
219
|
+
**kwargs: Mapping[str, Any],
|
220
|
+
) -> None:
|
221
|
+
"""
|
222
|
+
Create a patch by comparing current dir with a Metaflow run.
|
223
|
+
"""
|
224
|
+
if file_path is None:
|
225
|
+
file_path = run_pathspec.lower().replace("/", "_") + ".patch"
|
226
|
+
if os.path.exists(file_path) and not overwrite:
|
227
|
+
echo(f"File *{file_path}* already exists. To overwrite, specify --overwrite.")
|
228
|
+
return
|
229
|
+
op_args: Mapping[str, Any] = {**kwargs, "dst": file_path}
|
230
|
+
run_op(run_pathspec, op_patch, **op_args)
|
@@ -1133,13 +1133,16 @@ class StubGenerator:
|
|
1133
1133
|
result = result[1:]
|
1134
1134
|
# Add doc to first and last overloads. Jedi uses the last one and pycharm
|
1135
1135
|
# the first one. Go figure.
|
1136
|
+
result_docstring = docs["func_doc"]
|
1137
|
+
if docs["param_doc"]:
|
1138
|
+
result_docstring += "\nParameters\n----------\n" + docs["param_doc"]
|
1136
1139
|
result[0] = (
|
1137
1140
|
result[0][0],
|
1138
|
-
|
1141
|
+
result_docstring,
|
1139
1142
|
)
|
1140
1143
|
result[-1] = (
|
1141
1144
|
result[-1][0],
|
1142
|
-
|
1145
|
+
result_docstring,
|
1143
1146
|
)
|
1144
1147
|
return result
|
1145
1148
|
|
metaflow/cmd/main_cli.py
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
import sys
|
2
|
+
import subprocess
|
3
|
+
from pathlib import Path
|
4
|
+
import sysconfig
|
5
|
+
|
6
|
+
|
7
|
+
def main():
|
8
|
+
share_dir = Path(sysconfig.get_paths()["data"]) / "share" / "metaflow" / "devtools"
|
9
|
+
makefile_path = share_dir / "Makefile"
|
10
|
+
cmd = ["make", "-f", str(makefile_path)] + sys.argv[1:]
|
11
|
+
# subprocess.run(cmd, check=True)
|
12
|
+
try:
|
13
|
+
completed = subprocess.run(cmd, check=True)
|
14
|
+
sys.exit(completed.returncode)
|
15
|
+
except subprocess.CalledProcessError as ex:
|
16
|
+
sys.exit(ex.returncode)
|
metaflow/metaflow_config.py
CHANGED
@@ -381,6 +381,8 @@ KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
|
|
381
381
|
# Default kubernetes QoS class
|
382
382
|
KUBERNETES_QOS = from_conf("KUBERNETES_QOS", "burstable")
|
383
383
|
|
384
|
+
# Architecture of kubernetes nodes - used for @conda/@pypi in metaflow-dev
|
385
|
+
KUBERNETES_CONDA_ARCH = from_conf("KUBERNETES_CONDA_ARCH")
|
384
386
|
ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
|
385
387
|
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
|
386
388
|
|
metaflow/metaflow_environment.py
CHANGED
@@ -6,7 +6,7 @@ from .util import get_username
|
|
6
6
|
from . import metaflow_version
|
7
7
|
from metaflow.exception import MetaflowException
|
8
8
|
from metaflow.extension_support import dump_module_info
|
9
|
-
from metaflow.mflog import BASH_MFLOG
|
9
|
+
from metaflow.mflog import BASH_MFLOG, BASH_FLUSH_LOGS
|
10
10
|
from . import R
|
11
11
|
|
12
12
|
|
@@ -159,6 +159,7 @@ class MetaflowEnvironment(object):
|
|
159
159
|
def get_package_commands(self, code_package_url, datastore_type):
|
160
160
|
cmds = [
|
161
161
|
BASH_MFLOG,
|
162
|
+
BASH_FLUSH_LOGS,
|
162
163
|
"mflog 'Setting up task environment.'",
|
163
164
|
self._get_install_dependencies_cmd(datastore_type),
|
164
165
|
"mkdir metaflow",
|
@@ -176,6 +177,7 @@ class MetaflowEnvironment(object):
|
|
176
177
|
"fi" % code_package_url,
|
177
178
|
"TAR_OPTIONS='--warning=no-timestamp' tar xf job.tar",
|
178
179
|
"mflog 'Task is starting.'",
|
180
|
+
"flush_mflogs",
|
179
181
|
]
|
180
182
|
return cmds
|
181
183
|
|
metaflow/mflog/__init__.py
CHANGED
@@ -44,6 +44,8 @@ BASH_MFLOG = (
|
|
44
44
|
BASH_SAVE_LOGS_ARGS = ["python", "-m", "metaflow.mflog.save_logs"]
|
45
45
|
BASH_SAVE_LOGS = " ".join(BASH_SAVE_LOGS_ARGS)
|
46
46
|
|
47
|
+
BASH_FLUSH_LOGS = "flush_mflogs(){ " f"{BASH_SAVE_LOGS}; " "}"
|
48
|
+
|
47
49
|
|
48
50
|
# this function returns a bash expression that redirects stdout
|
49
51
|
# and stderr of the given bash expression to mflog.tee
|
@@ -63,7 +65,7 @@ def bash_capture_logs(bash_expr, var_transform=None):
|
|
63
65
|
# update_delay determines how often logs should be uploaded to S3
|
64
66
|
# as a function of the task execution time
|
65
67
|
|
66
|
-
MIN_UPDATE_DELAY =
|
68
|
+
MIN_UPDATE_DELAY = 0.25 # the most frequent update interval
|
67
69
|
MAX_UPDATE_DELAY = 30.0 # the least frequent update interval
|
68
70
|
|
69
71
|
|
@@ -110,7 +112,6 @@ def export_mflog_env_vars(
|
|
110
112
|
|
111
113
|
def tail_logs(prefix, stdout_tail, stderr_tail, echo, has_log_updates):
|
112
114
|
def _available_logs(tail, stream, echo, should_persist=False):
|
113
|
-
# print the latest batch of lines
|
114
115
|
try:
|
115
116
|
for line in tail:
|
116
117
|
if should_persist:
|
@@ -128,7 +129,7 @@ def tail_logs(prefix, stdout_tail, stderr_tail, echo, has_log_updates):
|
|
128
129
|
|
129
130
|
start_time = time.time()
|
130
131
|
next_log_update = start_time
|
131
|
-
log_update_delay =
|
132
|
+
log_update_delay = update_delay(0)
|
132
133
|
while has_log_updates():
|
133
134
|
if time.time() > next_log_update:
|
134
135
|
_available_logs(stdout_tail, "stdout", echo)
|
@@ -211,7 +211,7 @@ def step(
|
|
211
211
|
log_driver=None,
|
212
212
|
log_options=None,
|
213
213
|
num_parallel=None,
|
214
|
-
**kwargs
|
214
|
+
**kwargs,
|
215
215
|
):
|
216
216
|
def echo(msg, stream="stderr", batch_id=None, **kwargs):
|
217
217
|
msg = util.to_unicode(msg)
|
@@ -273,11 +273,11 @@ def step(
|
|
273
273
|
"metaflow_version"
|
274
274
|
]
|
275
275
|
|
276
|
+
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
|
277
|
+
|
276
278
|
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
|
277
279
|
if env_deco:
|
278
|
-
env
|
279
|
-
else:
|
280
|
-
env = {}
|
280
|
+
env.update(env_deco[0].attributes["vars"])
|
281
281
|
|
282
282
|
# Add the environment variables related to the input-paths argument
|
283
283
|
if split_vars:
|
@@ -298,6 +298,13 @@ class BatchDecorator(StepDecorator):
|
|
298
298
|
self._save_logs_sidecar = Sidecar("save_logs_periodically")
|
299
299
|
self._save_logs_sidecar.start()
|
300
300
|
|
301
|
+
# Start spot termination monitor sidecar.
|
302
|
+
current._update_env(
|
303
|
+
{"spot_termination_notice": "/tmp/spot_termination_notice"}
|
304
|
+
)
|
305
|
+
self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
|
306
|
+
self._spot_monitor_sidecar.start()
|
307
|
+
|
301
308
|
num_parallel = int(os.environ.get("AWS_BATCH_JOB_NUM_NODES", 0))
|
302
309
|
if num_parallel >= 1 and ubf_context == UBF_CONTROL:
|
303
310
|
# UBF handling for multinode case
|
@@ -350,6 +357,7 @@ class BatchDecorator(StepDecorator):
|
|
350
357
|
|
351
358
|
try:
|
352
359
|
self._save_logs_sidecar.terminate()
|
360
|
+
self._spot_monitor_sidecar.terminate()
|
353
361
|
except:
|
354
362
|
# Best effort kill
|
355
363
|
pass
|
@@ -29,6 +29,7 @@ from metaflow.metaflow_config import (
|
|
29
29
|
KUBERNETES_SHARED_MEMORY,
|
30
30
|
KUBERNETES_TOLERATIONS,
|
31
31
|
KUBERNETES_QOS,
|
32
|
+
KUBERNETES_CONDA_ARCH,
|
32
33
|
)
|
33
34
|
from metaflow.plugins.resources_decorator import ResourcesDecorator
|
34
35
|
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
@@ -158,7 +159,7 @@ class KubernetesDecorator(StepDecorator):
|
|
158
159
|
|
159
160
|
# Conda environment support
|
160
161
|
supports_conda_environment = True
|
161
|
-
target_platform = "linux-64"
|
162
|
+
target_platform = KUBERNETES_CONDA_ARCH or "linux-64"
|
162
163
|
|
163
164
|
def init(self):
|
164
165
|
super(KubernetesDecorator, self).init()
|
@@ -319,6 +319,8 @@ class RunningJobSet(object):
|
|
319
319
|
def kill(self):
|
320
320
|
plural = "jobsets"
|
321
321
|
client = self._client.get()
|
322
|
+
if not (self.is_running or self.is_waiting):
|
323
|
+
return
|
322
324
|
try:
|
323
325
|
# Killing the control pod will trigger the jobset to mark everything as failed.
|
324
326
|
# Since jobsets have a successPolicy set to `All` which ensures that everything has
|
@@ -8,6 +8,7 @@ import subprocess
|
|
8
8
|
import sys
|
9
9
|
import tarfile
|
10
10
|
import time
|
11
|
+
import platform
|
11
12
|
from urllib.error import URLError
|
12
13
|
from urllib.request import urlopen
|
13
14
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_USE_FAST_INIT
|
@@ -19,7 +20,7 @@ import warnings
|
|
19
20
|
|
20
21
|
from . import MAGIC_FILE, _datastore_packageroot
|
21
22
|
|
22
|
-
FAST_INIT_BIN_URL = "https://fast-flow-init.outerbounds.sh/{platform}/
|
23
|
+
FAST_INIT_BIN_URL = "https://fast-flow-init.outerbounds.sh/{platform}/latest"
|
23
24
|
|
24
25
|
# Bootstraps a valid conda virtual environment composed of conda and pypi packages
|
25
26
|
|
@@ -36,29 +37,6 @@ def timer(func):
|
|
36
37
|
|
37
38
|
|
38
39
|
if __name__ == "__main__":
|
39
|
-
# TODO: Detect architecture on the fly when dealing with arm architectures.
|
40
|
-
# ARCH=$(uname -m)
|
41
|
-
# OS=$(uname)
|
42
|
-
|
43
|
-
# if [[ "$OS" == "Linux" ]]; then
|
44
|
-
# PLATFORM="linux"
|
45
|
-
# if [[ "$ARCH" == "aarch64" ]]; then
|
46
|
-
# ARCH="aarch64";
|
47
|
-
# elif [[ $ARCH == "ppc64le" ]]; then
|
48
|
-
# ARCH="ppc64le";
|
49
|
-
# else
|
50
|
-
# ARCH="64";
|
51
|
-
# fi
|
52
|
-
# fi
|
53
|
-
|
54
|
-
# if [[ "$OS" == "Darwin" ]]; then
|
55
|
-
# PLATFORM="osx";
|
56
|
-
# if [[ "$ARCH" == "arm64" ]]; then
|
57
|
-
# ARCH="arm64";
|
58
|
-
# else
|
59
|
-
# ARCH="64"
|
60
|
-
# fi
|
61
|
-
# fi
|
62
40
|
|
63
41
|
def run_cmd(cmd, stdin_str=None):
|
64
42
|
result = subprocess.run(
|
@@ -350,12 +328,25 @@ if __name__ == "__main__":
|
|
350
328
|
cmd = f"fast-initializer --prefix {prefix} --packages-dir {pkgs_dir}"
|
351
329
|
run_cmd(cmd, all_package_urls)
|
352
330
|
|
353
|
-
if len(sys.argv) !=
|
354
|
-
print("Usage: bootstrap.py <flow_name> <id> <datastore_type>
|
331
|
+
if len(sys.argv) != 4:
|
332
|
+
print("Usage: bootstrap.py <flow_name> <id> <datastore_type>")
|
355
333
|
sys.exit(1)
|
356
334
|
|
357
335
|
try:
|
358
|
-
_, flow_name, id_, datastore_type
|
336
|
+
_, flow_name, id_, datastore_type = sys.argv
|
337
|
+
|
338
|
+
system = platform.system().lower()
|
339
|
+
arch_machine = platform.machine().lower()
|
340
|
+
|
341
|
+
if system == "darwin" and arch_machine == "arm64":
|
342
|
+
architecture = "osx-arm64"
|
343
|
+
elif system == "darwin":
|
344
|
+
architecture = "osx-64"
|
345
|
+
elif system == "linux" and arch_machine == "aarch64":
|
346
|
+
architecture = "linux-aarch64"
|
347
|
+
else:
|
348
|
+
# default fallback
|
349
|
+
architecture = "linux-64"
|
359
350
|
|
360
351
|
prefix = os.path.join(os.getcwd(), architecture, id_)
|
361
352
|
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
@@ -190,7 +190,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
190
190
|
# 4. Start PyPI solves in parallel after each conda environment is created
|
191
191
|
# 5. Download PyPI packages sequentially
|
192
192
|
# 6. Create and cache PyPI environments in parallel
|
193
|
-
|
194
193
|
with ThreadPoolExecutor() as executor:
|
195
194
|
# Start all conda solves in parallel
|
196
195
|
conda_futures = [
|
@@ -213,14 +212,14 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
213
212
|
|
214
213
|
# Queue PyPI solve to start after conda create
|
215
214
|
if result[0] in pypi_envs:
|
215
|
+
# solve pypi envs uniquely
|
216
|
+
pypi_env = pypi_envs.pop(result[0])
|
216
217
|
|
217
218
|
def pypi_solve(env):
|
218
219
|
create_future.result() # Wait for conda create
|
219
220
|
return solve(*env, "pypi")
|
220
221
|
|
221
|
-
pypi_futures.append(
|
222
|
-
executor.submit(pypi_solve, pypi_envs[result[0]])
|
223
|
-
)
|
222
|
+
pypi_futures.append(executor.submit(pypi_solve, pypi_env))
|
224
223
|
|
225
224
|
# Process PyPI results sequentially for downloads
|
226
225
|
for solve_future in pypi_futures:
|
@@ -242,7 +241,7 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
242
241
|
if id_:
|
243
242
|
# bootstrap.py is responsible for ensuring the validity of this executable.
|
244
243
|
# -s is important! Can otherwise leak packages to other environments.
|
245
|
-
return os.path.join("
|
244
|
+
return os.path.join("$MF_ARCH", id_, "bin/python -s")
|
246
245
|
else:
|
247
246
|
# for @conda/@pypi(disabled=True).
|
248
247
|
return super().executable(step_name, default)
|
@@ -315,7 +314,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
315
314
|
# 5. All resolved packages (Conda or PyPI) are cached
|
316
315
|
# 6. PyPI packages are only installed for local platform
|
317
316
|
|
318
|
-
# Resolve `linux-64` Conda environments if @batch or @kubernetes are in play
|
319
317
|
target_platform = conda_platform()
|
320
318
|
for decorator in step.decorators:
|
321
319
|
# NOTE: Keep the list of supported decorator names for backward compatibility purposes.
|
@@ -329,7 +327,6 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
329
327
|
"snowpark",
|
330
328
|
"slurm",
|
331
329
|
]:
|
332
|
-
# TODO: Support arm architectures
|
333
330
|
target_platform = getattr(decorator, "target_platform", "linux-64")
|
334
331
|
break
|
335
332
|
|
@@ -424,15 +421,18 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
424
421
|
if id_:
|
425
422
|
return [
|
426
423
|
"echo 'Bootstrapping virtual environment...'",
|
424
|
+
"flush_mflogs",
|
427
425
|
# We have to prevent the tracing module from loading,
|
428
426
|
# as the bootstrapping process uses the internal S3 client which would fail to import tracing
|
429
427
|
# due to the required dependencies being bundled into the conda environment,
|
430
428
|
# which is yet to be initialized at this point.
|
431
|
-
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s"
|
429
|
+
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s"'
|
432
430
|
% (self.flow.name, id_, self.datastore_type),
|
433
431
|
"echo 'Environment bootstrapped.'",
|
432
|
+
"flush_mflogs",
|
434
433
|
# To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
|
435
434
|
"export PATH=$PATH:$(pwd)/micromamba/bin",
|
435
|
+
"export MF_ARCH=$(case $(uname)/$(uname -m) in Darwin/arm64)echo osx-arm64;;Darwin/*)echo osx-64;;Linux/aarch64)echo linux-aarch64;;*)echo linux-64;;esac)",
|
436
436
|
]
|
437
437
|
else:
|
438
438
|
# for @conda/@pypi(disabled=True).
|