metaflow 2.13__py2.py3-none-any.whl → 2.13.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/extension_support/plugins.py +1 -0
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +2 -0
- metaflow/parameters.py +1 -1
- metaflow/plugins/__init__.py +13 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +98 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +40 -22
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/metaflow_runner.py +25 -3
- metaflow/runtime.py +18 -8
- metaflow/user_configs/config_parameters.py +23 -6
- metaflow/version.py +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/METADATA +2 -2
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/RECORD +40 -40
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/WHEEL +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/LICENSE +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/entry_points.txt +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.2.dist-info}/top_level.txt +0 -0
@@ -8,12 +8,14 @@ import subprocess
|
|
8
8
|
import sys
|
9
9
|
import tarfile
|
10
10
|
import time
|
11
|
-
|
12
|
-
import
|
13
|
-
|
11
|
+
from urllib.error import URLError
|
12
|
+
from urllib.request import urlopen
|
14
13
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
15
14
|
from metaflow.plugins import DATASTORES
|
15
|
+
from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
|
16
16
|
from metaflow.util import which
|
17
|
+
from urllib.request import Request
|
18
|
+
import warnings
|
17
19
|
|
18
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
19
21
|
|
@@ -32,11 +34,6 @@ def timer(func):
|
|
32
34
|
|
33
35
|
|
34
36
|
if __name__ == "__main__":
|
35
|
-
if len(sys.argv) != 5:
|
36
|
-
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
37
|
-
sys.exit(1)
|
38
|
-
_, flow_name, id_, datastore_type, architecture = sys.argv
|
39
|
-
|
40
37
|
# TODO: Detect architecture on the fly when dealing with arm architectures.
|
41
38
|
# ARCH=$(uname -m)
|
42
39
|
# OS=$(uname)
|
@@ -61,30 +58,6 @@ if __name__ == "__main__":
|
|
61
58
|
# fi
|
62
59
|
# fi
|
63
60
|
|
64
|
-
prefix = os.path.join(os.getcwd(), architecture, id_)
|
65
|
-
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
66
|
-
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
67
|
-
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
68
|
-
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
69
|
-
|
70
|
-
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
71
|
-
if not datastores:
|
72
|
-
print(f"No datastore found for type: {datastore_type}")
|
73
|
-
sys.exit(1)
|
74
|
-
|
75
|
-
storage = datastores[0](
|
76
|
-
_datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
|
77
|
-
)
|
78
|
-
|
79
|
-
# Move MAGIC_FILE inside local datastore.
|
80
|
-
os.makedirs(manifest_dir, exist_ok=True)
|
81
|
-
shutil.move(
|
82
|
-
os.path.join(os.getcwd(), MAGIC_FILE),
|
83
|
-
os.path.join(manifest_dir, MAGIC_FILE),
|
84
|
-
)
|
85
|
-
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
86
|
-
env = json.load(f)[id_][architecture]
|
87
|
-
|
88
61
|
def run_cmd(cmd):
|
89
62
|
result = subprocess.run(
|
90
63
|
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
@@ -107,29 +80,55 @@ if __name__ == "__main__":
|
|
107
80
|
return micromamba_path
|
108
81
|
|
109
82
|
# Download and extract in one go
|
110
|
-
|
111
|
-
|
83
|
+
url = MICROMAMBA_URL.format(platform=architecture, version="2.0.4")
|
84
|
+
mirror_url = MICROMAMBA_MIRROR_URL.format(
|
85
|
+
platform=architecture, version="2.0.4"
|
86
|
+
)
|
112
87
|
|
113
88
|
# Prepare directory once
|
114
89
|
os.makedirs(os.path.dirname(micromamba_path), exist_ok=True)
|
115
90
|
|
116
|
-
#
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
decompressor = bz2.BZ2Decompressor()
|
124
|
-
|
125
|
-
# Process in memory without temporary files
|
126
|
-
tar_content = decompressor.decompress(response.raw.read())
|
91
|
+
# Download and decompress in one go
|
92
|
+
def _download_and_extract(url):
|
93
|
+
headers = {
|
94
|
+
"Accept-Encoding": "gzip, deflate, br",
|
95
|
+
"Connection": "keep-alive",
|
96
|
+
"User-Agent": "python-urllib",
|
97
|
+
}
|
127
98
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
99
|
+
max_retries = 3
|
100
|
+
for attempt in range(max_retries):
|
101
|
+
try:
|
102
|
+
req = Request(url, headers=headers)
|
103
|
+
|
104
|
+
with urlopen(req) as response:
|
105
|
+
decompressor = bz2.BZ2Decompressor()
|
106
|
+
with warnings.catch_warnings():
|
107
|
+
warnings.filterwarnings(
|
108
|
+
"ignore", category=DeprecationWarning
|
109
|
+
)
|
110
|
+
with tarfile.open(
|
111
|
+
fileobj=io.BytesIO(
|
112
|
+
decompressor.decompress(response.read())
|
113
|
+
),
|
114
|
+
mode="r:",
|
115
|
+
) as tar:
|
116
|
+
member = tar.getmember("bin/micromamba")
|
117
|
+
tar.extract(member, micromamba_dir)
|
118
|
+
break
|
119
|
+
except (URLError, IOError) as e:
|
120
|
+
if attempt == max_retries - 1:
|
121
|
+
raise Exception(
|
122
|
+
f"Failed to download micromamba after {max_retries} attempts: {e}"
|
123
|
+
)
|
124
|
+
time.sleep(2**attempt)
|
125
|
+
|
126
|
+
try:
|
127
|
+
# first try from mirror
|
128
|
+
_download_and_extract(mirror_url)
|
129
|
+
except Exception:
|
130
|
+
# download from mirror failed, try official source before failing.
|
131
|
+
_download_and_extract(url)
|
133
132
|
|
134
133
|
# Set executable permission
|
135
134
|
os.chmod(micromamba_path, 0o755)
|
@@ -140,7 +139,6 @@ if __name__ == "__main__":
|
|
140
139
|
|
141
140
|
@timer
|
142
141
|
def download_conda_packages(storage, packages, dest_dir):
|
143
|
-
|
144
142
|
def process_conda_package(args):
|
145
143
|
# Ensure that conda packages go into architecture specific folders.
|
146
144
|
# The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
|
@@ -169,7 +167,6 @@ if __name__ == "__main__":
|
|
169
167
|
|
170
168
|
@timer
|
171
169
|
def download_pypi_packages(storage, packages, dest_dir):
|
172
|
-
|
173
170
|
def process_pypi_package(args):
|
174
171
|
key, tmpfile, dest_dir = args
|
175
172
|
dest = os.path.join(dest_dir, os.path.basename(key))
|
@@ -208,7 +205,6 @@ if __name__ == "__main__":
|
|
208
205
|
|
209
206
|
@timer
|
210
207
|
def install_pypi_packages(prefix, pypi_pkgs_dir):
|
211
|
-
|
212
208
|
cmd = f"""set -e;
|
213
209
|
export PATH=$PATH:$(pwd)/micromamba;
|
214
210
|
export CONDA_PKGS_DIRS=$(pwd)/micromamba/pkgs;
|
@@ -272,4 +268,41 @@ if __name__ == "__main__":
|
|
272
268
|
# wait for conda environment to be created
|
273
269
|
futures["conda_env"].result()
|
274
270
|
|
275
|
-
|
271
|
+
if len(sys.argv) != 5:
|
272
|
+
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
|
273
|
+
sys.exit(1)
|
274
|
+
|
275
|
+
try:
|
276
|
+
_, flow_name, id_, datastore_type, architecture = sys.argv
|
277
|
+
|
278
|
+
prefix = os.path.join(os.getcwd(), architecture, id_)
|
279
|
+
pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
|
280
|
+
conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
|
281
|
+
pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
|
282
|
+
manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
|
283
|
+
|
284
|
+
datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
|
285
|
+
if not datastores:
|
286
|
+
print(f"No datastore found for type: {datastore_type}")
|
287
|
+
sys.exit(1)
|
288
|
+
|
289
|
+
storage = datastores[0](
|
290
|
+
_datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
|
291
|
+
)
|
292
|
+
|
293
|
+
# Move MAGIC_FILE inside local datastore.
|
294
|
+
os.makedirs(manifest_dir, exist_ok=True)
|
295
|
+
shutil.move(
|
296
|
+
os.path.join(os.getcwd(), MAGIC_FILE),
|
297
|
+
os.path.join(manifest_dir, MAGIC_FILE),
|
298
|
+
)
|
299
|
+
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
300
|
+
env = json.load(f)[id_][architecture]
|
301
|
+
|
302
|
+
setup_environment(
|
303
|
+
architecture, storage, env, prefix, conda_pkgs_dir, pypi_pkgs_dir
|
304
|
+
)
|
305
|
+
|
306
|
+
except Exception as e:
|
307
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
308
|
+
sys.exit(1)
|
@@ -7,20 +7,15 @@ import json
|
|
7
7
|
import os
|
8
8
|
import tarfile
|
9
9
|
import threading
|
10
|
-
import time
|
11
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
12
11
|
from functools import wraps
|
13
12
|
from hashlib import sha256
|
14
13
|
from io import BufferedIOBase, BytesIO
|
15
|
-
from itertools import chain
|
16
14
|
from urllib.parse import unquote, urlparse
|
17
15
|
|
18
|
-
import requests
|
19
|
-
|
20
16
|
from metaflow.exception import MetaflowException
|
21
17
|
from metaflow.metaflow_config import get_pinned_conda_libs
|
22
18
|
from metaflow.metaflow_environment import MetaflowEnvironment
|
23
|
-
from metaflow.metaflow_profile import profile
|
24
19
|
|
25
20
|
from . import MAGIC_FILE, _datastore_packageroot
|
26
21
|
from .utils import conda_platform
|
@@ -498,6 +493,7 @@ class LazyOpen(BufferedIOBase):
|
|
498
493
|
self._file = None
|
499
494
|
self._buffer = None
|
500
495
|
self._position = 0
|
496
|
+
self.requests = None
|
501
497
|
|
502
498
|
def _ensure_file(self):
|
503
499
|
if not self._file:
|
@@ -514,8 +510,13 @@ class LazyOpen(BufferedIOBase):
|
|
514
510
|
raise ValueError("Both filename and url are missing")
|
515
511
|
|
516
512
|
def _download_to_buffer(self):
|
513
|
+
if self.requests is None:
|
514
|
+
# TODO: Remove dependency on requests
|
515
|
+
import requests
|
516
|
+
|
517
|
+
self.requests = requests
|
517
518
|
# TODO: Stream it in chunks?
|
518
|
-
response = requests.get(self.url, stream=True)
|
519
|
+
response = self.requests.get(self.url, stream=True)
|
519
520
|
response.raise_for_status()
|
520
521
|
return response.content
|
521
522
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import functools
|
2
2
|
import json
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import subprocess
|
5
6
|
import tempfile
|
6
7
|
import time
|
@@ -8,7 +9,7 @@ import time
|
|
8
9
|
from metaflow.exception import MetaflowException
|
9
10
|
from metaflow.util import which
|
10
11
|
|
11
|
-
from .utils import conda_platform
|
12
|
+
from .utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL, conda_platform
|
12
13
|
|
13
14
|
|
14
15
|
class MicromambaException(MetaflowException):
|
@@ -23,6 +24,8 @@ class MicromambaException(MetaflowException):
|
|
23
24
|
|
24
25
|
GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
|
25
26
|
|
27
|
+
_double_equal_match = re.compile("==(?=[<=>!~])")
|
28
|
+
|
26
29
|
|
27
30
|
class Micromamba(object):
|
28
31
|
def __init__(self, logger=None):
|
@@ -101,7 +104,8 @@ class Micromamba(object):
|
|
101
104
|
cmd.append("--channel=%s" % channel)
|
102
105
|
|
103
106
|
for package, version in packages.items():
|
104
|
-
|
107
|
+
version_string = "%s==%s" % (package, version)
|
108
|
+
cmd.append(_double_equal_match.sub("", version_string))
|
105
109
|
if python:
|
106
110
|
cmd.append("python==%s" % python)
|
107
111
|
# TODO: Ensure a human readable message is returned when the environment
|
@@ -323,7 +327,7 @@ class Micromamba(object):
|
|
323
327
|
stderr="\n".join(err),
|
324
328
|
)
|
325
329
|
)
|
326
|
-
except (TypeError, ValueError)
|
330
|
+
except (TypeError, ValueError):
|
327
331
|
pass
|
328
332
|
raise MicromambaException(
|
329
333
|
msg.format(
|
@@ -339,23 +343,37 @@ def _install_micromamba(installation_location):
|
|
339
343
|
# Unfortunately no 32bit binaries are available for micromamba, which ideally
|
340
344
|
# shouldn't be much of a problem in today's world.
|
341
345
|
platform = conda_platform()
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
# requires bzip2
|
346
|
-
result = subprocess.Popen(
|
347
|
-
f"curl -Ls https://micro.mamba.pm/api/micromamba/{platform}/1.5.7 | tar -xvj -C {installation_location} bin/micromamba",
|
348
|
-
shell=True,
|
349
|
-
stderr=subprocess.PIPE,
|
350
|
-
stdout=subprocess.PIPE,
|
351
|
-
)
|
352
|
-
_, err = result.communicate()
|
353
|
-
if result.returncode != 0:
|
354
|
-
raise MicromambaException(
|
355
|
-
f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
|
356
|
-
)
|
346
|
+
url = MICROMAMBA_URL.format(platform=platform, version="1.5.7")
|
347
|
+
mirror_url = MICROMAMBA_MIRROR_URL.format(platform=platform, version="1.5.7")
|
348
|
+
os.makedirs(installation_location, exist_ok=True)
|
357
349
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
350
|
+
def _download_and_extract(url):
|
351
|
+
max_retries = 3
|
352
|
+
for attempt in range(max_retries):
|
353
|
+
try:
|
354
|
+
# https://mamba.readthedocs.io/en/latest/micromamba-installation.html#manual-installation
|
355
|
+
# requires bzip2
|
356
|
+
result = subprocess.Popen(
|
357
|
+
f"curl -Ls {url} | tar -xvj -C {installation_location} bin/micromamba",
|
358
|
+
shell=True,
|
359
|
+
stderr=subprocess.PIPE,
|
360
|
+
stdout=subprocess.PIPE,
|
361
|
+
)
|
362
|
+
_, err = result.communicate()
|
363
|
+
if result.returncode != 0:
|
364
|
+
raise MicromambaException(
|
365
|
+
f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
|
366
|
+
)
|
367
|
+
except subprocess.CalledProcessError as e:
|
368
|
+
if attempt == max_retries - 1:
|
369
|
+
raise MicromambaException(
|
370
|
+
"Micromamba installation failed:\n{}".format(e.stderr.decode())
|
371
|
+
)
|
372
|
+
time.sleep(2**attempt)
|
373
|
+
|
374
|
+
try:
|
375
|
+
# prioritize downloading from mirror
|
376
|
+
_download_and_extract(mirror_url)
|
377
|
+
except Exception:
|
378
|
+
# download from official source as a fallback
|
379
|
+
_download_and_extract(url)
|
metaflow/plugins/pypi/pip.py
CHANGED
@@ -4,7 +4,6 @@ import re
|
|
4
4
|
import shutil
|
5
5
|
import subprocess
|
6
6
|
import tempfile
|
7
|
-
import time
|
8
7
|
from concurrent.futures import ThreadPoolExecutor
|
9
8
|
from itertools import chain, product
|
10
9
|
from urllib.parse import unquote
|
@@ -107,9 +106,8 @@ class Pip(object):
|
|
107
106
|
except PipPackageNotFound as ex:
|
108
107
|
# pretty print package errors
|
109
108
|
raise PipException(
|
110
|
-
"
|
111
|
-
"
|
112
|
-
"Note that ***@pypi*** does not currently support source distributions"
|
109
|
+
"Unable to find a binary distribution compatible with %s for %s.\n\n"
|
110
|
+
"Note: ***@pypi*** does not currently support source distributions"
|
113
111
|
% (ex.package_spec, platform)
|
114
112
|
)
|
115
113
|
|
metaflow/plugins/pypi/utils.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import os
|
2
1
|
import platform
|
3
2
|
import sys
|
4
3
|
|
@@ -17,10 +16,13 @@ else:
|
|
17
16
|
from metaflow._vendor.packaging import tags
|
18
17
|
from metaflow._vendor.packaging.utils import parse_wheel_filename
|
19
18
|
|
20
|
-
from urllib.parse import unquote
|
19
|
+
from urllib.parse import unquote
|
21
20
|
|
22
21
|
from metaflow.exception import MetaflowException
|
23
22
|
|
23
|
+
MICROMAMBA_URL = "https://micro.mamba.pm/api/micromamba/{platform}/{version}"
|
24
|
+
MICROMAMBA_MIRROR_URL = "https://micromamba.outerbounds.sh/{platform}/{version}.tar.bz2"
|
25
|
+
|
24
26
|
|
25
27
|
def conda_platform():
|
26
28
|
# Returns the conda platform for the Python interpreter
|
@@ -7,6 +7,8 @@ from typing import Dict, Iterator, Optional, Tuple
|
|
7
7
|
|
8
8
|
from metaflow import Run
|
9
9
|
|
10
|
+
from metaflow.plugins import get_runner_cli
|
11
|
+
|
10
12
|
from .utils import (
|
11
13
|
temporary_fifo,
|
12
14
|
handle_timeout,
|
@@ -187,7 +189,27 @@ class ExecutingRun(object):
|
|
187
189
|
yield position, line
|
188
190
|
|
189
191
|
|
190
|
-
class
|
192
|
+
class RunnerMeta(type):
|
193
|
+
def __new__(mcs, name, bases, dct):
|
194
|
+
cls = super().__new__(mcs, name, bases, dct)
|
195
|
+
|
196
|
+
def _injected_method(subcommand_name, runner_subcommand):
|
197
|
+
def f(self, *args, **kwargs):
|
198
|
+
return runner_subcommand(self, *args, **kwargs)
|
199
|
+
|
200
|
+
f.__doc__ = runner_subcommand.__doc__ or ""
|
201
|
+
f.__name__ = subcommand_name
|
202
|
+
|
203
|
+
return f
|
204
|
+
|
205
|
+
for runner_subcommand in get_runner_cli():
|
206
|
+
method_name = runner_subcommand.name.replace("-", "_")
|
207
|
+
setattr(cls, method_name, _injected_method(method_name, runner_subcommand))
|
208
|
+
|
209
|
+
return cls
|
210
|
+
|
211
|
+
|
212
|
+
class Runner(metaclass=RunnerMeta):
|
191
213
|
"""
|
192
214
|
Metaflow's Runner API that presents a programmatic interface
|
193
215
|
to run flows and perform other operations either synchronously or asynchronously.
|
@@ -337,7 +359,7 @@ class Runner(object):
|
|
337
359
|
|
338
360
|
return self.__get_executing_run(attribute_file_fd, command_obj)
|
339
361
|
|
340
|
-
def resume(self, **kwargs):
|
362
|
+
def resume(self, **kwargs) -> ExecutingRun:
|
341
363
|
"""
|
342
364
|
Blocking resume execution of the run.
|
343
365
|
This method will wait until the resumed run has completed execution.
|
@@ -400,7 +422,7 @@ class Runner(object):
|
|
400
422
|
|
401
423
|
return await self.__async_get_executing_run(attribute_file_fd, command_obj)
|
402
424
|
|
403
|
-
async def async_resume(self, **kwargs):
|
425
|
+
async def async_resume(self, **kwargs) -> ExecutingRun:
|
404
426
|
"""
|
405
427
|
Non-blocking resume execution of the run.
|
406
428
|
This method will return as soon as the resume has launched.
|
metaflow/runtime.py
CHANGED
@@ -125,7 +125,7 @@ class NativeRuntime(object):
|
|
125
125
|
self._clone_run_id = clone_run_id
|
126
126
|
self._clone_only = clone_only
|
127
127
|
self._cloned_tasks = []
|
128
|
-
self.
|
128
|
+
self._ran_or_scheduled_task_index = set()
|
129
129
|
self._reentrant = reentrant
|
130
130
|
self._run_url = None
|
131
131
|
|
@@ -297,7 +297,7 @@ class NativeRuntime(object):
|
|
297
297
|
task.ubf_context = ubf_context
|
298
298
|
new_task_id = task.task_id
|
299
299
|
self._cloned_tasks.append(task)
|
300
|
-
self.
|
300
|
+
self._ran_or_scheduled_task_index.add(cloned_task_pathspec_index)
|
301
301
|
task_pathspec = "{}/{}/{}".format(self._run_id, step_name, new_task_id)
|
302
302
|
else:
|
303
303
|
task_pathspec = "{}/{}/{}".format(self._run_id, step_name, new_task_id)
|
@@ -384,8 +384,10 @@ class NativeRuntime(object):
|
|
384
384
|
and step_name != "_parameters"
|
385
385
|
and (step_name not in self._steps_to_rerun)
|
386
386
|
):
|
387
|
-
# "_unbounded_foreach" is a special flag to indicate that the transition
|
388
|
-
#
|
387
|
+
# "_unbounded_foreach" is a special flag to indicate that the transition
|
388
|
+
# is an unbounded foreach.
|
389
|
+
# Both parent and splitted children tasks will have this flag set.
|
390
|
+
# The splitted control/mapper tasks
|
389
391
|
# are not foreach types because UBF is always followed by a join step.
|
390
392
|
is_ubf_task = (
|
391
393
|
"_unbounded_foreach" in task_ds and task_ds["_unbounded_foreach"]
|
@@ -647,10 +649,18 @@ class NativeRuntime(object):
|
|
647
649
|
# Store the parameters needed for task creation, so that pushing on items
|
648
650
|
# onto the run_queue is an inexpensive operation.
|
649
651
|
def _queue_push(self, step, task_kwargs, index=None):
|
650
|
-
#
|
651
|
-
#
|
652
|
-
|
653
|
-
|
652
|
+
# In the case of cloning, we set all the cloned tasks as the
|
653
|
+
# finished tasks when pushing tasks using _queue_tasks. This means that we
|
654
|
+
# could potentially try to push the same task multiple times (for example
|
655
|
+
# if multiple parents of a join are cloned). We therefore keep track of what
|
656
|
+
# has executed (been cloned) or what has been scheduled and avoid scheduling
|
657
|
+
# it again.
|
658
|
+
if index:
|
659
|
+
if index in self._ran_or_scheduled_task_index:
|
660
|
+
# It has already run or been scheduled
|
661
|
+
return
|
662
|
+
# Note that we are scheduling this to run
|
663
|
+
self._ran_or_scheduled_task_index.add(index)
|
654
664
|
self._run_queue.insert(0, (step, task_kwargs))
|
655
665
|
# For foreaches, this will happen multiple time but is ok, becomes a no-op
|
656
666
|
self._unprocessed_steps.discard(step)
|
@@ -183,7 +183,7 @@ class DelayEvaluator(collections.abc.Mapping):
|
|
183
183
|
|
184
184
|
def __getattr__(self, name):
|
185
185
|
if self._access is None:
|
186
|
-
raise AttributeError()
|
186
|
+
raise AttributeError(name)
|
187
187
|
self._access.append(name)
|
188
188
|
return self
|
189
189
|
|
@@ -336,6 +336,8 @@ class Config(Parameter, collections.abc.Mapping):
|
|
336
336
|
self.parser = parser
|
337
337
|
self._computed_value = None
|
338
338
|
|
339
|
+
self._delayed_evaluator = None
|
340
|
+
|
339
341
|
def load_parameter(self, v):
|
340
342
|
if v is None:
|
341
343
|
return None
|
@@ -344,22 +346,37 @@ class Config(Parameter, collections.abc.Mapping):
|
|
344
346
|
def _store_value(self, v: Any) -> None:
|
345
347
|
self._computed_value = v
|
346
348
|
|
349
|
+
def _init_delayed_evaluator(self) -> None:
|
350
|
+
if self._delayed_evaluator is None:
|
351
|
+
self._delayed_evaluator = DelayEvaluator(self.name.lower())
|
352
|
+
|
347
353
|
# Support <config>.<var> syntax
|
348
354
|
def __getattr__(self, name):
|
349
|
-
return DelayEvaluator
|
355
|
+
# Need to return a new DelayEvaluator everytime because the evaluator will
|
356
|
+
# contain the "path" (ie: .name) and can be further accessed.
|
357
|
+
return getattr(DelayEvaluator(self.name.lower()), name)
|
350
358
|
|
351
|
-
# Next three methods are to implement mapping to support **<config> syntax
|
359
|
+
# Next three methods are to implement mapping to support **<config> syntax. We
|
360
|
+
# need to be careful, however, to also support a regular `config["key"]` syntax
|
361
|
+
# which calls into `__getitem__` and therefore behaves like __getattr__ above.
|
352
362
|
def __iter__(self):
|
353
|
-
|
363
|
+
self._init_delayed_evaluator()
|
364
|
+
yield from self._delayed_evaluator
|
354
365
|
|
355
366
|
def __len__(self):
|
356
|
-
|
367
|
+
self._init_delayed_evaluator()
|
368
|
+
return len(self._delayed_evaluator)
|
357
369
|
|
358
370
|
def __getitem__(self, key):
|
371
|
+
self._init_delayed_evaluator()
|
372
|
+
if key.startswith(UNPACK_KEY):
|
373
|
+
return self._delayed_evaluator[key]
|
359
374
|
return DelayEvaluator(self.name.lower())[key]
|
360
375
|
|
361
376
|
|
362
377
|
def resolve_delayed_evaluator(v: Any, ignore_errors: bool = False) -> Any:
|
378
|
+
# NOTE: We don't ignore errors in downstream calls because we want to have either
|
379
|
+
# all or nothing for the top-level call by the user.
|
363
380
|
try:
|
364
381
|
if isinstance(v, DelayEvaluator):
|
365
382
|
return v()
|
@@ -397,7 +414,7 @@ def unpack_delayed_evaluator(
|
|
397
414
|
else:
|
398
415
|
# k.startswith(UNPACK_KEY)
|
399
416
|
try:
|
400
|
-
result.update(resolve_delayed_evaluator(v
|
417
|
+
result.update(resolve_delayed_evaluator(v))
|
401
418
|
except Exception as e:
|
402
419
|
if ignore_errors:
|
403
420
|
continue
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.13"
|
1
|
+
metaflow_version = "2.13.2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.13
|
3
|
+
Version: 2.13.2
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs==2.13; extra == "stubs"
|
29
|
+
Requires-Dist: metaflow-stubs==2.13.2; extra == "stubs"
|
30
30
|
|
31
31
|

|
32
32
|
|