metaflow 2.18.13__py2.py3-none-any.whl → 2.19.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/argo/argo_client.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +3 -1
- metaflow/plugins/cards/card_datastore.py +9 -3
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Makefile +10 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/METADATA +2 -4
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/RECORD +50 -47
- {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.18.13.data → metaflow-2.19.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/WHEEL +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/entry_points.txt +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.1.dist-info}/top_level.txt +0 -0
metaflow/flowspec.py
CHANGED
|
@@ -307,7 +307,8 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
307
307
|
% (deco._flow_cls.__name__, cls.__name__)
|
|
308
308
|
)
|
|
309
309
|
debug.userconf_exec(
|
|
310
|
-
"Evaluating flow level decorator %s"
|
|
310
|
+
"Evaluating flow level decorator %s (pre-mutate)"
|
|
311
|
+
% deco.__class__.__name__
|
|
311
312
|
)
|
|
312
313
|
deco.pre_mutate(mutable_flow)
|
|
313
314
|
# We reset cached_parameters on the very off chance that the user added
|
|
@@ -324,7 +325,7 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
324
325
|
if isinstance(deco, StepMutator):
|
|
325
326
|
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
|
|
326
327
|
debug.userconf_exec(
|
|
327
|
-
"Evaluating step level decorator %s for %s"
|
|
328
|
+
"Evaluating step level decorator %s for %s (pre-mutate)"
|
|
328
329
|
% (deco.__class__.__name__, step.name)
|
|
329
330
|
)
|
|
330
331
|
deco.pre_mutate(
|
metaflow/metaflow_config.py
CHANGED
|
@@ -21,6 +21,7 @@ if sys.platform == "darwin":
|
|
|
21
21
|
|
|
22
22
|
# Path to the local directory to store artifacts for 'local' datastore.
|
|
23
23
|
DATASTORE_LOCAL_DIR = ".metaflow"
|
|
24
|
+
DATASTORE_SPIN_LOCAL_DIR = ".metaflow_spin"
|
|
24
25
|
|
|
25
26
|
# Local configuration file (in .metaflow) containing overrides per-project
|
|
26
27
|
LOCAL_CONFIG_FILE = "config.json"
|
|
@@ -47,6 +48,38 @@ DEFAULT_FROM_DEPLOYMENT_IMPL = from_conf(
|
|
|
47
48
|
"DEFAULT_FROM_DEPLOYMENT_IMPL", "argo-workflows"
|
|
48
49
|
)
|
|
49
50
|
|
|
51
|
+
###
|
|
52
|
+
# Spin configuration
|
|
53
|
+
###
|
|
54
|
+
# Essentially a whitelist of decorators that are allowed in Spin steps
|
|
55
|
+
SPIN_ALLOWED_DECORATORS = from_conf(
|
|
56
|
+
"SPIN_ALLOWED_DECORATORS",
|
|
57
|
+
[
|
|
58
|
+
"conda",
|
|
59
|
+
"pypi",
|
|
60
|
+
"conda_base",
|
|
61
|
+
"pypi_base",
|
|
62
|
+
"environment",
|
|
63
|
+
"project",
|
|
64
|
+
"timeout",
|
|
65
|
+
"conda_env_internal",
|
|
66
|
+
"card",
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Essentially a blacklist of decorators that are not allowed in Spin steps
|
|
71
|
+
# Note: decorators not in either SPIN_ALLOWED_DECORATORS or SPIN_DISALLOWED_DECORATORS
|
|
72
|
+
# are simply ignored in Spin steps
|
|
73
|
+
SPIN_DISALLOWED_DECORATORS = from_conf(
|
|
74
|
+
"SPIN_DISALLOWED_DECORATORS",
|
|
75
|
+
[
|
|
76
|
+
"parallel",
|
|
77
|
+
],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Default value for persist option in spin command
|
|
81
|
+
SPIN_PERSIST = from_conf("SPIN_PERSIST", False)
|
|
82
|
+
|
|
50
83
|
###
|
|
51
84
|
# User configuration
|
|
52
85
|
###
|
|
@@ -57,6 +90,7 @@ USER = from_conf("USER")
|
|
|
57
90
|
# Datastore configuration
|
|
58
91
|
###
|
|
59
92
|
DATASTORE_SYSROOT_LOCAL = from_conf("DATASTORE_SYSROOT_LOCAL")
|
|
93
|
+
DATASTORE_SYSROOT_SPIN = from_conf("DATASTORE_SYSROOT_SPIN")
|
|
60
94
|
# S3 bucket and prefix to store artifacts for 's3' datastore.
|
|
61
95
|
DATASTORE_SYSROOT_S3 = from_conf("DATASTORE_SYSROOT_S3")
|
|
62
96
|
# Azure Blob Storage container and blob prefix
|
|
@@ -109,6 +143,9 @@ S3_WORKER_COUNT = from_conf("S3_WORKER_COUNT", 64)
|
|
|
109
143
|
# top-level retries)
|
|
110
144
|
S3_TRANSIENT_RETRY_COUNT = from_conf("S3_TRANSIENT_RETRY_COUNT", 20)
|
|
111
145
|
|
|
146
|
+
# Whether to log transient retry messages to stdout
|
|
147
|
+
S3_LOG_TRANSIENT_RETRIES = from_conf("S3_LOG_TRANSIENT_RETRIES", False)
|
|
148
|
+
|
|
112
149
|
# S3 retry configuration used in the aws client
|
|
113
150
|
# Use the adaptive retry strategy by default
|
|
114
151
|
S3_CLIENT_RETRY_CONFIG = from_conf(
|
|
@@ -461,6 +498,10 @@ ESCAPE_HATCH_WARNING = from_conf("ESCAPE_HATCH_WARNING", True)
|
|
|
461
498
|
###
|
|
462
499
|
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE = from_conf("FEAT_ALWAYS_UPLOAD_CODE_PACKAGE", False)
|
|
463
500
|
###
|
|
501
|
+
# Profile
|
|
502
|
+
###
|
|
503
|
+
PROFILE_FROM_START = from_conf("PROFILE_FROM_START", False)
|
|
504
|
+
###
|
|
464
505
|
# Debug configuration
|
|
465
506
|
###
|
|
466
507
|
DEBUG_OPTIONS = [
|
metaflow/metaflow_profile.py
CHANGED
|
@@ -2,6 +2,24 @@ import time
|
|
|
2
2
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
|
|
5
|
+
from .metaflow_config import PROFILE_FROM_START
|
|
6
|
+
|
|
7
|
+
init_time = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if PROFILE_FROM_START:
|
|
11
|
+
|
|
12
|
+
def from_start(msg: str):
|
|
13
|
+
global init_time
|
|
14
|
+
if init_time is None:
|
|
15
|
+
init_time = time.time()
|
|
16
|
+
print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
|
|
17
|
+
|
|
18
|
+
else:
|
|
19
|
+
|
|
20
|
+
def from_start(_msg: str):
|
|
21
|
+
pass
|
|
22
|
+
|
|
5
23
|
|
|
6
24
|
@contextmanager
|
|
7
25
|
def profile(label, stats_dict=None):
|
metaflow/packaging_sys/utils.py
CHANGED
|
@@ -2,45 +2,7 @@ import os
|
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
from typing import Callable, Generator, List, Optional, Tuple
|
|
4
4
|
|
|
5
|
-
from ..util import to_unicode
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# this is os.walk(follow_symlinks=True) with cycle detection
|
|
9
|
-
def walk_without_cycles(
|
|
10
|
-
top_root: str,
|
|
11
|
-
exclude_dirs: Optional[List[str]] = None,
|
|
12
|
-
) -> Generator[Tuple[str, List[str]], None, None]:
|
|
13
|
-
seen = set()
|
|
14
|
-
|
|
15
|
-
default_skip_dirs = ["__pycache__"]
|
|
16
|
-
|
|
17
|
-
def _recurse(root, skip_dirs):
|
|
18
|
-
for parent, dirs, files in os.walk(root):
|
|
19
|
-
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
20
|
-
for d in dirs:
|
|
21
|
-
path = os.path.join(parent, d)
|
|
22
|
-
if os.path.islink(path):
|
|
23
|
-
# Breaking loops: never follow the same symlink twice
|
|
24
|
-
#
|
|
25
|
-
# NOTE: this also means that links to sibling links are
|
|
26
|
-
# not followed. In this case:
|
|
27
|
-
#
|
|
28
|
-
# x -> y
|
|
29
|
-
# y -> oo
|
|
30
|
-
# oo/real_file
|
|
31
|
-
#
|
|
32
|
-
# real_file is only included twice, not three times
|
|
33
|
-
reallink = os.path.realpath(path)
|
|
34
|
-
if reallink not in seen:
|
|
35
|
-
seen.add(reallink)
|
|
36
|
-
for x in _recurse(path, default_skip_dirs):
|
|
37
|
-
yield x
|
|
38
|
-
yield parent, files
|
|
39
|
-
|
|
40
|
-
skip_dirs = set(default_skip_dirs + (exclude_dirs or []))
|
|
41
|
-
for x in _recurse(top_root, skip_dirs):
|
|
42
|
-
skip_dirs = default_skip_dirs
|
|
43
|
-
yield x
|
|
5
|
+
from ..util import to_unicode, walk_without_cycles
|
|
44
6
|
|
|
45
7
|
|
|
46
8
|
def walk(
|
|
@@ -53,6 +15,7 @@ def walk(
|
|
|
53
15
|
prefixlen = len("%s/" % os.path.dirname(root))
|
|
54
16
|
for (
|
|
55
17
|
path,
|
|
18
|
+
_,
|
|
56
19
|
files,
|
|
57
20
|
) in walk_without_cycles(root, exclude_tl_dirs):
|
|
58
21
|
if exclude_hidden and "/." in path:
|
metaflow/packaging_sys/v1.py
CHANGED
|
@@ -16,7 +16,7 @@ from ..exception import MetaflowException
|
|
|
16
16
|
from ..metaflow_version import get_version
|
|
17
17
|
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
|
18
18
|
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
|
19
|
-
from ..util import get_metaflow_root
|
|
19
|
+
from ..util import get_metaflow_root, walk_without_cycles
|
|
20
20
|
from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
|
|
21
21
|
from .distribution_support import _ModuleInfo, modules_to_distributions
|
|
22
22
|
from .utils import suffix_filter, walk
|
|
@@ -269,12 +269,50 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
269
269
|
# If the module is a single file, we handle this here by looking at __file__
|
|
270
270
|
# which will point to the single file. If it is an actual module, __path__
|
|
271
271
|
# will contain the path(s) to the module
|
|
272
|
+
if hasattr(module, "__file__") and module.__file__:
|
|
273
|
+
root_paths = [Path(module.__file__).resolve().as_posix()]
|
|
274
|
+
else:
|
|
275
|
+
root_paths = []
|
|
276
|
+
seen_path_values = set()
|
|
277
|
+
new_paths = module.__spec__.submodule_search_locations
|
|
278
|
+
while new_paths:
|
|
279
|
+
paths = new_paths
|
|
280
|
+
new_paths = []
|
|
281
|
+
for p in paths:
|
|
282
|
+
if p in seen_path_values:
|
|
283
|
+
continue
|
|
284
|
+
if os.path.isdir(p):
|
|
285
|
+
root_paths.append(Path(p).resolve().as_posix())
|
|
286
|
+
elif p in sys.path_importer_cache:
|
|
287
|
+
# We have a path hook that we likely need to call to get the actual path
|
|
288
|
+
addl_spec = sys.path_importer_cache[p].find_spec(name)
|
|
289
|
+
if (
|
|
290
|
+
addl_spec is not None
|
|
291
|
+
and addl_spec.submodule_search_locations
|
|
292
|
+
):
|
|
293
|
+
new_paths.extend(addl_spec.submodule_search_locations)
|
|
294
|
+
else:
|
|
295
|
+
# This may not be as required since it is likely the importer cache has
|
|
296
|
+
# everything already but just in case, we will also go through the
|
|
297
|
+
# path hooks and see if we find another one
|
|
298
|
+
for path_hook in sys.path_hooks:
|
|
299
|
+
try:
|
|
300
|
+
finder = path_hook(p)
|
|
301
|
+
addl_spec = finder.find_spec(name)
|
|
302
|
+
if (
|
|
303
|
+
addl_spec is not None
|
|
304
|
+
and addl_spec.submodule_search_locations
|
|
305
|
+
):
|
|
306
|
+
new_paths.extend(
|
|
307
|
+
addl_spec.submodule_search_locations
|
|
308
|
+
)
|
|
309
|
+
break
|
|
310
|
+
except ImportError:
|
|
311
|
+
continue
|
|
312
|
+
seen_path_values.add(p)
|
|
272
313
|
self._modules[name] = _ModuleInfo(
|
|
273
314
|
name,
|
|
274
|
-
set(
|
|
275
|
-
Path(p).resolve().as_posix()
|
|
276
|
-
for p in getattr(module, "__path__", [module.__file__])
|
|
277
|
-
),
|
|
315
|
+
set(root_paths),
|
|
278
316
|
module,
|
|
279
317
|
False, # This is not a Metaflow module (added by the user manually)
|
|
280
318
|
)
|
|
@@ -417,15 +455,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
417
455
|
% (dist_name, name)
|
|
418
456
|
)
|
|
419
457
|
dist_root = str(dist.locate_file(name))
|
|
420
|
-
|
|
421
|
-
# This is an error because it means that this distribution is
|
|
422
|
-
# not contributing to the module.
|
|
423
|
-
raise RuntimeError(
|
|
424
|
-
"Distribution '%s' is not contributing to module '%s' as "
|
|
425
|
-
"expected (got '%s' when expected one of %s)"
|
|
426
|
-
% (dist.metadata["Name"], name, dist_root, paths)
|
|
427
|
-
)
|
|
428
|
-
paths.discard(dist_root)
|
|
458
|
+
has_file_in_root = False
|
|
429
459
|
if dist_name not in self._distmetainfo:
|
|
430
460
|
# Possible that a distribution contributes to multiple modules
|
|
431
461
|
self._distmetainfo[dist_name] = {
|
|
@@ -438,13 +468,30 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
438
468
|
for file in dist.files or []:
|
|
439
469
|
# Skip files that do not belong to this module (distribution may
|
|
440
470
|
# provide multiple modules)
|
|
441
|
-
if
|
|
471
|
+
if (
|
|
472
|
+
file.parts[: len(prefix_parts)] != prefix_parts
|
|
473
|
+
or file.suffix == ".pth"
|
|
474
|
+
or str(file).startswith("__editable__")
|
|
475
|
+
):
|
|
442
476
|
continue
|
|
443
477
|
if file.parts[len(prefix_parts)] == "__init__.py":
|
|
444
478
|
has_init = True
|
|
479
|
+
has_file_in_root = True
|
|
480
|
+
# At this point, we know that we are seeing actual files in the
|
|
481
|
+
# dist_root so we make sure it is as expected
|
|
482
|
+
if dist_root not in paths:
|
|
483
|
+
# This is an error because it means that this distribution is
|
|
484
|
+
# not contributing to the module.
|
|
485
|
+
raise RuntimeError(
|
|
486
|
+
"Distribution '%s' is not contributing to module '%s' as "
|
|
487
|
+
"expected (got '%s' when expected one of %s)"
|
|
488
|
+
% (dist.metadata["Name"], name, dist_root, paths)
|
|
489
|
+
)
|
|
445
490
|
yield str(
|
|
446
491
|
dist.locate_file(file).resolve().as_posix()
|
|
447
492
|
), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
|
|
493
|
+
if has_file_in_root:
|
|
494
|
+
paths.discard(dist_root)
|
|
448
495
|
|
|
449
496
|
# Now if there are more paths left in paths, it means there is a non-distribution
|
|
450
497
|
# component to this package which we also include.
|
|
@@ -460,7 +507,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
460
507
|
)
|
|
461
508
|
has_init = True
|
|
462
509
|
else:
|
|
463
|
-
for root, _, files in
|
|
510
|
+
for root, _, files in walk_without_cycles(path):
|
|
464
511
|
for file in files:
|
|
465
512
|
if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
|
|
466
513
|
continue
|
metaflow/plugins/__init__.py
CHANGED
|
@@ -83,11 +83,13 @@ ENVIRONMENTS_DESC = [
|
|
|
83
83
|
METADATA_PROVIDERS_DESC = [
|
|
84
84
|
("service", ".metadata_providers.service.ServiceMetadataProvider"),
|
|
85
85
|
("local", ".metadata_providers.local.LocalMetadataProvider"),
|
|
86
|
+
("spin", ".metadata_providers.spin.SpinMetadataProvider"),
|
|
86
87
|
]
|
|
87
88
|
|
|
88
89
|
# Add datastore here
|
|
89
90
|
DATASTORES_DESC = [
|
|
90
91
|
("local", ".datastores.local_storage.LocalStorage"),
|
|
92
|
+
("spin", ".datastores.spin_storage.SpinStorage"),
|
|
91
93
|
("s3", ".datastores.s3_storage.S3Storage"),
|
|
92
94
|
("azure", ".datastores.azure_storage.AzureStorage"),
|
|
93
95
|
("gs", ".datastores.gs_storage.GSStorage"),
|
|
@@ -325,6 +325,7 @@ class ArgoClient(object):
|
|
|
325
325
|
"failedJobsHistoryLimit": 10000, # default is unfortunately 1
|
|
326
326
|
"successfulJobsHistoryLimit": 10000, # default is unfortunately 3
|
|
327
327
|
"workflowSpec": {"workflowTemplateRef": {"name": name}},
|
|
328
|
+
"startingDeadlineSeconds": 3540, # configuring this to 59 minutes so a failed trigger of cron workflow can succeed at most 59 mins after scheduled execution
|
|
328
329
|
},
|
|
329
330
|
}
|
|
330
331
|
try:
|
|
@@ -2136,6 +2136,8 @@ class ArgoWorkflows(object):
|
|
|
2136
2136
|
foreach_step,
|
|
2137
2137
|
)
|
|
2138
2138
|
)
|
|
2139
|
+
# NOTE: input-paths might be extremely lengthy so we dump these to disk instead of passing them directly to the cmd
|
|
2140
|
+
step_cmds.append("echo %s >> /tmp/mf-input-paths" % input_paths)
|
|
2139
2141
|
step = [
|
|
2140
2142
|
"step",
|
|
2141
2143
|
node.name,
|
|
@@ -2143,7 +2145,7 @@ class ArgoWorkflows(object):
|
|
|
2143
2145
|
"--task-id %s" % task_id,
|
|
2144
2146
|
"--retry-count %s" % retry_count,
|
|
2145
2147
|
"--max-user-code-retries %d" % user_code_retries,
|
|
2146
|
-
"--input-paths
|
|
2148
|
+
"--input-paths-filename /tmp/mf-input-paths",
|
|
2147
2149
|
]
|
|
2148
2150
|
if node.parallel_step:
|
|
2149
2151
|
step.append(
|
|
@@ -9,6 +9,7 @@ from metaflow.metaflow_config import (
|
|
|
9
9
|
CARD_S3ROOT,
|
|
10
10
|
CARD_LOCALROOT,
|
|
11
11
|
DATASTORE_LOCAL_DIR,
|
|
12
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
12
13
|
CARD_SUFFIX,
|
|
13
14
|
CARD_AZUREROOT,
|
|
14
15
|
CARD_GSROOT,
|
|
@@ -58,12 +59,17 @@ class CardDatastore(object):
|
|
|
58
59
|
return CARD_AZUREROOT
|
|
59
60
|
elif storage_type == "gs":
|
|
60
61
|
return CARD_GSROOT
|
|
61
|
-
elif storage_type == "local":
|
|
62
|
+
elif storage_type == "local" or storage_type == "spin":
|
|
62
63
|
# Borrowing some of the logic from LocalStorage.get_storage_root
|
|
63
64
|
result = CARD_LOCALROOT
|
|
65
|
+
local_dir = (
|
|
66
|
+
DATASTORE_SPIN_LOCAL_DIR
|
|
67
|
+
if storage_type == "spin"
|
|
68
|
+
else DATASTORE_LOCAL_DIR
|
|
69
|
+
)
|
|
64
70
|
if result is None:
|
|
65
71
|
current_path = os.getcwd()
|
|
66
|
-
check_dir = os.path.join(current_path,
|
|
72
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
67
73
|
check_dir = os.path.realpath(check_dir)
|
|
68
74
|
orig_path = check_dir
|
|
69
75
|
while not os.path.isdir(check_dir):
|
|
@@ -73,7 +79,7 @@ class CardDatastore(object):
|
|
|
73
79
|
# return the top level path
|
|
74
80
|
return os.path.join(orig_path, CARD_SUFFIX)
|
|
75
81
|
current_path = new_path
|
|
76
|
-
check_dir = os.path.join(current_path,
|
|
82
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
77
83
|
return os.path.join(check_dir, CARD_SUFFIX)
|
|
78
84
|
else:
|
|
79
85
|
# Let's make it obvious we need to update this block for each new datastore backend...
|
|
@@ -496,9 +496,15 @@ class TaskInfoComponent(MetaflowCardComponent):
|
|
|
496
496
|
)
|
|
497
497
|
|
|
498
498
|
# ignore the name as a parameter
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
499
|
+
if "_parameters" not in self._task.parent.parent:
|
|
500
|
+
# In case of spin steps, there is no _parameters task
|
|
501
|
+
param_ids = []
|
|
502
|
+
else:
|
|
503
|
+
param_ids = [
|
|
504
|
+
p.id
|
|
505
|
+
for p in self._task.parent.parent["_parameters"].task
|
|
506
|
+
if p.id != "name"
|
|
507
|
+
]
|
|
502
508
|
if len(param_ids) > 0:
|
|
503
509
|
# Extract parameter from the Parameter Task. That is less brittle.
|
|
504
510
|
parameter_data = TaskToDict(
|
|
@@ -1,24 +1,29 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from metaflow.metaflow_config import
|
|
4
|
+
from metaflow.metaflow_config import (
|
|
5
|
+
DATASTORE_LOCAL_DIR,
|
|
6
|
+
DATASTORE_SYSROOT_LOCAL,
|
|
7
|
+
)
|
|
5
8
|
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class LocalStorage(DataStoreStorage):
|
|
9
12
|
TYPE = "local"
|
|
10
13
|
METADATA_DIR = "_meta"
|
|
14
|
+
DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
|
|
15
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_LOCAL
|
|
11
16
|
|
|
12
17
|
@classmethod
|
|
13
18
|
def get_datastore_root_from_config(cls, echo, create_on_absent=True):
|
|
14
|
-
result =
|
|
19
|
+
result = cls.SYSROOT_VAR
|
|
15
20
|
if result is None:
|
|
16
21
|
try:
|
|
17
22
|
# Python2
|
|
18
23
|
current_path = os.getcwdu()
|
|
19
24
|
except: # noqa E722
|
|
20
25
|
current_path = os.getcwd()
|
|
21
|
-
check_dir = os.path.join(current_path,
|
|
26
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
22
27
|
check_dir = os.path.realpath(check_dir)
|
|
23
28
|
orig_path = check_dir
|
|
24
29
|
top_level_reached = False
|
|
@@ -28,12 +33,13 @@ class LocalStorage(DataStoreStorage):
|
|
|
28
33
|
top_level_reached = True
|
|
29
34
|
break # We are no longer making upward progress
|
|
30
35
|
current_path = new_path
|
|
31
|
-
check_dir = os.path.join(current_path,
|
|
36
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
32
37
|
if top_level_reached:
|
|
33
38
|
if create_on_absent:
|
|
34
39
|
# Could not find any directory to use so create a new one
|
|
35
40
|
echo(
|
|
36
|
-
"Creating
|
|
41
|
+
"Creating %s datastore in current directory (%s)"
|
|
42
|
+
% (cls.TYPE, orig_path)
|
|
37
43
|
)
|
|
38
44
|
os.mkdir(orig_path)
|
|
39
45
|
result = orig_path
|
|
@@ -42,7 +48,7 @@ class LocalStorage(DataStoreStorage):
|
|
|
42
48
|
else:
|
|
43
49
|
result = check_dir
|
|
44
50
|
else:
|
|
45
|
-
result = os.path.join(result,
|
|
51
|
+
result = os.path.join(result, cls.DATASTORE_DIR)
|
|
46
52
|
return result
|
|
47
53
|
|
|
48
54
|
@staticmethod
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from metaflow.metaflow_config import (
|
|
2
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
3
|
+
DATASTORE_SYSROOT_SPIN,
|
|
4
|
+
)
|
|
5
|
+
from metaflow.plugins.datastores.local_storage import LocalStorage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SpinStorage(LocalStorage):
|
|
9
|
+
TYPE = "spin"
|
|
10
|
+
METADATA_DIR = "_meta"
|
|
11
|
+
DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
|
|
12
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_SPIN
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
DATATOOLS_S3ROOT,
|
|
19
19
|
S3_RETRY_COUNT,
|
|
20
20
|
S3_TRANSIENT_RETRY_COUNT,
|
|
21
|
+
S3_LOG_TRANSIENT_RETRIES,
|
|
21
22
|
S3_SERVER_SIDE_ENCRYPTION,
|
|
22
23
|
S3_WORKER_COUNT,
|
|
23
24
|
TEMPDIR,
|
|
@@ -1760,17 +1761,35 @@ class S3(object):
|
|
|
1760
1761
|
# due to a transient failure so we try again.
|
|
1761
1762
|
transient_retry_count += 1
|
|
1762
1763
|
total_ok_count += last_ok_count
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1764
|
+
|
|
1765
|
+
if S3_LOG_TRANSIENT_RETRIES:
|
|
1766
|
+
# Extract transient error type from pending retry lines
|
|
1767
|
+
error_info = ""
|
|
1768
|
+
if pending_retries:
|
|
1769
|
+
try:
|
|
1770
|
+
# Parse the first line to get transient error type
|
|
1771
|
+
first_retry = json.loads(
|
|
1772
|
+
pending_retries[0].decode("utf-8").strip()
|
|
1773
|
+
)
|
|
1774
|
+
if "transient_error_type" in first_retry:
|
|
1775
|
+
error_info = (
|
|
1776
|
+
" (%s)" % first_retry["transient_error_type"]
|
|
1777
|
+
)
|
|
1778
|
+
except (json.JSONDecodeError, IndexError, KeyError):
|
|
1779
|
+
pass
|
|
1780
|
+
|
|
1781
|
+
print(
|
|
1782
|
+
"Transient S3 failure (attempt #%d) -- total success: %d, "
|
|
1783
|
+
"last attempt %d/%d -- remaining: %d%s"
|
|
1784
|
+
% (
|
|
1785
|
+
transient_retry_count,
|
|
1786
|
+
total_ok_count,
|
|
1787
|
+
last_ok_count,
|
|
1788
|
+
last_ok_count + last_retry_count,
|
|
1789
|
+
len(pending_retries),
|
|
1790
|
+
error_info,
|
|
1791
|
+
)
|
|
1772
1792
|
)
|
|
1773
|
-
)
|
|
1774
1793
|
if inject_failures == 0:
|
|
1775
1794
|
# Don't sleep when we are "faking" the failures
|
|
1776
1795
|
self._jitter_sleep(transient_retry_count)
|