metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/graph.py +2 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/argo/argo_workflows.py +20 -25
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/cards/card_datastore.py +13 -13
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/click_api.py +4 -2
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
metaflow/flowspec.py
CHANGED
|
@@ -307,7 +307,8 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
307
307
|
% (deco._flow_cls.__name__, cls.__name__)
|
|
308
308
|
)
|
|
309
309
|
debug.userconf_exec(
|
|
310
|
-
"Evaluating flow level decorator %s"
|
|
310
|
+
"Evaluating flow level decorator %s (pre-mutate)"
|
|
311
|
+
% deco.__class__.__name__
|
|
311
312
|
)
|
|
312
313
|
deco.pre_mutate(mutable_flow)
|
|
313
314
|
# We reset cached_parameters on the very off chance that the user added
|
|
@@ -324,7 +325,7 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
324
325
|
if isinstance(deco, StepMutator):
|
|
325
326
|
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
|
|
326
327
|
debug.userconf_exec(
|
|
327
|
-
"Evaluating step level decorator %s for %s"
|
|
328
|
+
"Evaluating step level decorator %s for %s (pre-mutate)"
|
|
328
329
|
% (deco.__class__.__name__, step.name)
|
|
329
330
|
)
|
|
330
331
|
deco.pre_mutate(
|
metaflow/graph.py
CHANGED
|
@@ -94,7 +94,7 @@ class DAGNode(object):
|
|
|
94
94
|
case_key = None
|
|
95
95
|
|
|
96
96
|
# handle string literals
|
|
97
|
-
if isinstance(key, ast.Str):
|
|
97
|
+
if hasattr(ast, "Str") and isinstance(key, ast.Str):
|
|
98
98
|
case_key = key.s
|
|
99
99
|
elif isinstance(key, ast.Constant):
|
|
100
100
|
case_key = key.value
|
|
@@ -171,7 +171,7 @@ class DAGNode(object):
|
|
|
171
171
|
# Get condition parameter
|
|
172
172
|
for keyword in tail.value.keywords:
|
|
173
173
|
if keyword.arg == "condition":
|
|
174
|
-
if isinstance(keyword.value, ast.Str):
|
|
174
|
+
if hasattr(ast, "Str") and isinstance(keyword.value, ast.Str):
|
|
175
175
|
condition_name = keyword.value.s
|
|
176
176
|
elif isinstance(keyword.value, ast.Constant) and isinstance(
|
|
177
177
|
keyword.value.value, str
|
metaflow/metaflow_config.py
CHANGED
|
@@ -21,6 +21,7 @@ if sys.platform == "darwin":
|
|
|
21
21
|
|
|
22
22
|
# Path to the local directory to store artifacts for 'local' datastore.
|
|
23
23
|
DATASTORE_LOCAL_DIR = ".metaflow"
|
|
24
|
+
DATASTORE_SPIN_LOCAL_DIR = ".metaflow_spin"
|
|
24
25
|
|
|
25
26
|
# Local configuration file (in .metaflow) containing overrides per-project
|
|
26
27
|
LOCAL_CONFIG_FILE = "config.json"
|
|
@@ -47,6 +48,38 @@ DEFAULT_FROM_DEPLOYMENT_IMPL = from_conf(
|
|
|
47
48
|
"DEFAULT_FROM_DEPLOYMENT_IMPL", "argo-workflows"
|
|
48
49
|
)
|
|
49
50
|
|
|
51
|
+
###
|
|
52
|
+
# Spin configuration
|
|
53
|
+
###
|
|
54
|
+
# Essentially a whitelist of decorators that are allowed in Spin steps
|
|
55
|
+
SPIN_ALLOWED_DECORATORS = from_conf(
|
|
56
|
+
"SPIN_ALLOWED_DECORATORS",
|
|
57
|
+
[
|
|
58
|
+
"conda",
|
|
59
|
+
"pypi",
|
|
60
|
+
"conda_base",
|
|
61
|
+
"pypi_base",
|
|
62
|
+
"environment",
|
|
63
|
+
"project",
|
|
64
|
+
"timeout",
|
|
65
|
+
"conda_env_internal",
|
|
66
|
+
"card",
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Essentially a blacklist of decorators that are not allowed in Spin steps
|
|
71
|
+
# Note: decorators not in either SPIN_ALLOWED_DECORATORS or SPIN_DISALLOWED_DECORATORS
|
|
72
|
+
# are simply ignored in Spin steps
|
|
73
|
+
SPIN_DISALLOWED_DECORATORS = from_conf(
|
|
74
|
+
"SPIN_DISALLOWED_DECORATORS",
|
|
75
|
+
[
|
|
76
|
+
"parallel",
|
|
77
|
+
],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Default value for persist option in spin command
|
|
81
|
+
SPIN_PERSIST = from_conf("SPIN_PERSIST", False)
|
|
82
|
+
|
|
50
83
|
###
|
|
51
84
|
# User configuration
|
|
52
85
|
###
|
|
@@ -57,6 +90,7 @@ USER = from_conf("USER")
|
|
|
57
90
|
# Datastore configuration
|
|
58
91
|
###
|
|
59
92
|
DATASTORE_SYSROOT_LOCAL = from_conf("DATASTORE_SYSROOT_LOCAL")
|
|
93
|
+
DATASTORE_SYSROOT_SPIN = from_conf("DATASTORE_SYSROOT_SPIN")
|
|
60
94
|
# S3 bucket and prefix to store artifacts for 's3' datastore.
|
|
61
95
|
DATASTORE_SYSROOT_S3 = from_conf("DATASTORE_SYSROOT_S3")
|
|
62
96
|
# Azure Blob Storage container and blob prefix
|
|
@@ -109,6 +143,9 @@ S3_WORKER_COUNT = from_conf("S3_WORKER_COUNT", 64)
|
|
|
109
143
|
# top-level retries)
|
|
110
144
|
S3_TRANSIENT_RETRY_COUNT = from_conf("S3_TRANSIENT_RETRY_COUNT", 20)
|
|
111
145
|
|
|
146
|
+
# Whether to log transient retry messages to stdout
|
|
147
|
+
S3_LOG_TRANSIENT_RETRIES = from_conf("S3_LOG_TRANSIENT_RETRIES", False)
|
|
148
|
+
|
|
112
149
|
# S3 retry configuration used in the aws client
|
|
113
150
|
# Use the adaptive retry strategy by default
|
|
114
151
|
S3_CLIENT_RETRY_CONFIG = from_conf(
|
|
@@ -461,6 +498,10 @@ ESCAPE_HATCH_WARNING = from_conf("ESCAPE_HATCH_WARNING", True)
|
|
|
461
498
|
###
|
|
462
499
|
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE = from_conf("FEAT_ALWAYS_UPLOAD_CODE_PACKAGE", False)
|
|
463
500
|
###
|
|
501
|
+
# Profile
|
|
502
|
+
###
|
|
503
|
+
PROFILE_FROM_START = from_conf("PROFILE_FROM_START", False)
|
|
504
|
+
###
|
|
464
505
|
# Debug configuration
|
|
465
506
|
###
|
|
466
507
|
DEBUG_OPTIONS = [
|
metaflow/metaflow_profile.py
CHANGED
|
@@ -2,6 +2,24 @@ import time
|
|
|
2
2
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
|
|
5
|
+
from .metaflow_config import PROFILE_FROM_START
|
|
6
|
+
|
|
7
|
+
init_time = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if PROFILE_FROM_START:
|
|
11
|
+
|
|
12
|
+
def from_start(msg: str):
|
|
13
|
+
global init_time
|
|
14
|
+
if init_time is None:
|
|
15
|
+
init_time = time.time()
|
|
16
|
+
print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
|
|
17
|
+
|
|
18
|
+
else:
|
|
19
|
+
|
|
20
|
+
def from_start(_msg: str):
|
|
21
|
+
pass
|
|
22
|
+
|
|
5
23
|
|
|
6
24
|
@contextmanager
|
|
7
25
|
def profile(label, stats_dict=None):
|
metaflow/packaging_sys/utils.py
CHANGED
|
@@ -2,45 +2,7 @@ import os
|
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
from typing import Callable, Generator, List, Optional, Tuple
|
|
4
4
|
|
|
5
|
-
from ..util import to_unicode
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# this is os.walk(follow_symlinks=True) with cycle detection
|
|
9
|
-
def walk_without_cycles(
|
|
10
|
-
top_root: str,
|
|
11
|
-
exclude_dirs: Optional[List[str]] = None,
|
|
12
|
-
) -> Generator[Tuple[str, List[str]], None, None]:
|
|
13
|
-
seen = set()
|
|
14
|
-
|
|
15
|
-
default_skip_dirs = ["__pycache__"]
|
|
16
|
-
|
|
17
|
-
def _recurse(root, skip_dirs):
|
|
18
|
-
for parent, dirs, files in os.walk(root):
|
|
19
|
-
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
20
|
-
for d in dirs:
|
|
21
|
-
path = os.path.join(parent, d)
|
|
22
|
-
if os.path.islink(path):
|
|
23
|
-
# Breaking loops: never follow the same symlink twice
|
|
24
|
-
#
|
|
25
|
-
# NOTE: this also means that links to sibling links are
|
|
26
|
-
# not followed. In this case:
|
|
27
|
-
#
|
|
28
|
-
# x -> y
|
|
29
|
-
# y -> oo
|
|
30
|
-
# oo/real_file
|
|
31
|
-
#
|
|
32
|
-
# real_file is only included twice, not three times
|
|
33
|
-
reallink = os.path.realpath(path)
|
|
34
|
-
if reallink not in seen:
|
|
35
|
-
seen.add(reallink)
|
|
36
|
-
for x in _recurse(path, default_skip_dirs):
|
|
37
|
-
yield x
|
|
38
|
-
yield parent, files
|
|
39
|
-
|
|
40
|
-
skip_dirs = set(default_skip_dirs + (exclude_dirs or []))
|
|
41
|
-
for x in _recurse(top_root, skip_dirs):
|
|
42
|
-
skip_dirs = default_skip_dirs
|
|
43
|
-
yield x
|
|
5
|
+
from ..util import to_unicode, walk_without_cycles
|
|
44
6
|
|
|
45
7
|
|
|
46
8
|
def walk(
|
|
@@ -53,6 +15,7 @@ def walk(
|
|
|
53
15
|
prefixlen = len("%s/" % os.path.dirname(root))
|
|
54
16
|
for (
|
|
55
17
|
path,
|
|
18
|
+
_,
|
|
56
19
|
files,
|
|
57
20
|
) in walk_without_cycles(root, exclude_tl_dirs):
|
|
58
21
|
if exclude_hidden and "/." in path:
|
metaflow/packaging_sys/v1.py
CHANGED
|
@@ -16,7 +16,7 @@ from ..exception import MetaflowException
|
|
|
16
16
|
from ..metaflow_version import get_version
|
|
17
17
|
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
|
18
18
|
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
|
19
|
-
from ..util import get_metaflow_root
|
|
19
|
+
from ..util import get_metaflow_root, walk_without_cycles
|
|
20
20
|
from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
|
|
21
21
|
from .distribution_support import _ModuleInfo, modules_to_distributions
|
|
22
22
|
from .utils import suffix_filter, walk
|
|
@@ -269,12 +269,50 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
269
269
|
# If the module is a single file, we handle this here by looking at __file__
|
|
270
270
|
# which will point to the single file. If it is an actual module, __path__
|
|
271
271
|
# will contain the path(s) to the module
|
|
272
|
+
if hasattr(module, "__file__") and module.__file__:
|
|
273
|
+
root_paths = [Path(module.__file__).resolve().as_posix()]
|
|
274
|
+
else:
|
|
275
|
+
root_paths = []
|
|
276
|
+
seen_path_values = set()
|
|
277
|
+
new_paths = module.__spec__.submodule_search_locations
|
|
278
|
+
while new_paths:
|
|
279
|
+
paths = new_paths
|
|
280
|
+
new_paths = []
|
|
281
|
+
for p in paths:
|
|
282
|
+
if p in seen_path_values:
|
|
283
|
+
continue
|
|
284
|
+
if os.path.isdir(p):
|
|
285
|
+
root_paths.append(Path(p).resolve().as_posix())
|
|
286
|
+
elif p in sys.path_importer_cache:
|
|
287
|
+
# We have a path hook that we likely need to call to get the actual path
|
|
288
|
+
addl_spec = sys.path_importer_cache[p].find_spec(name)
|
|
289
|
+
if (
|
|
290
|
+
addl_spec is not None
|
|
291
|
+
and addl_spec.submodule_search_locations
|
|
292
|
+
):
|
|
293
|
+
new_paths.extend(addl_spec.submodule_search_locations)
|
|
294
|
+
else:
|
|
295
|
+
# This may not be as required since it is likely the importer cache has
|
|
296
|
+
# everything already but just in case, we will also go through the
|
|
297
|
+
# path hooks and see if we find another one
|
|
298
|
+
for path_hook in sys.path_hooks:
|
|
299
|
+
try:
|
|
300
|
+
finder = path_hook(p)
|
|
301
|
+
addl_spec = finder.find_spec(name)
|
|
302
|
+
if (
|
|
303
|
+
addl_spec is not None
|
|
304
|
+
and addl_spec.submodule_search_locations
|
|
305
|
+
):
|
|
306
|
+
new_paths.extend(
|
|
307
|
+
addl_spec.submodule_search_locations
|
|
308
|
+
)
|
|
309
|
+
break
|
|
310
|
+
except ImportError:
|
|
311
|
+
continue
|
|
312
|
+
seen_path_values.add(p)
|
|
272
313
|
self._modules[name] = _ModuleInfo(
|
|
273
314
|
name,
|
|
274
|
-
set(
|
|
275
|
-
Path(p).resolve().as_posix()
|
|
276
|
-
for p in getattr(module, "__path__", [module.__file__])
|
|
277
|
-
),
|
|
315
|
+
set(root_paths),
|
|
278
316
|
module,
|
|
279
317
|
False, # This is not a Metaflow module (added by the user manually)
|
|
280
318
|
)
|
|
@@ -417,15 +455,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
417
455
|
% (dist_name, name)
|
|
418
456
|
)
|
|
419
457
|
dist_root = str(dist.locate_file(name))
|
|
420
|
-
|
|
421
|
-
# This is an error because it means that this distribution is
|
|
422
|
-
# not contributing to the module.
|
|
423
|
-
raise RuntimeError(
|
|
424
|
-
"Distribution '%s' is not contributing to module '%s' as "
|
|
425
|
-
"expected (got '%s' when expected one of %s)"
|
|
426
|
-
% (dist.metadata["Name"], name, dist_root, paths)
|
|
427
|
-
)
|
|
428
|
-
paths.discard(dist_root)
|
|
458
|
+
has_file_in_root = False
|
|
429
459
|
if dist_name not in self._distmetainfo:
|
|
430
460
|
# Possible that a distribution contributes to multiple modules
|
|
431
461
|
self._distmetainfo[dist_name] = {
|
|
@@ -438,13 +468,30 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
438
468
|
for file in dist.files or []:
|
|
439
469
|
# Skip files that do not belong to this module (distribution may
|
|
440
470
|
# provide multiple modules)
|
|
441
|
-
if
|
|
471
|
+
if (
|
|
472
|
+
file.parts[: len(prefix_parts)] != prefix_parts
|
|
473
|
+
or file.suffix == ".pth"
|
|
474
|
+
or str(file).startswith("__editable__")
|
|
475
|
+
):
|
|
442
476
|
continue
|
|
443
477
|
if file.parts[len(prefix_parts)] == "__init__.py":
|
|
444
478
|
has_init = True
|
|
479
|
+
has_file_in_root = True
|
|
480
|
+
# At this point, we know that we are seeing actual files in the
|
|
481
|
+
# dist_root so we make sure it is as expected
|
|
482
|
+
if dist_root not in paths:
|
|
483
|
+
# This is an error because it means that this distribution is
|
|
484
|
+
# not contributing to the module.
|
|
485
|
+
raise RuntimeError(
|
|
486
|
+
"Distribution '%s' is not contributing to module '%s' as "
|
|
487
|
+
"expected (got '%s' when expected one of %s)"
|
|
488
|
+
% (dist.metadata["Name"], name, dist_root, paths)
|
|
489
|
+
)
|
|
445
490
|
yield str(
|
|
446
491
|
dist.locate_file(file).resolve().as_posix()
|
|
447
492
|
), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
|
|
493
|
+
if has_file_in_root:
|
|
494
|
+
paths.discard(dist_root)
|
|
448
495
|
|
|
449
496
|
# Now if there are more paths left in paths, it means there is a non-distribution
|
|
450
497
|
# component to this package which we also include.
|
|
@@ -460,7 +507,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
460
507
|
)
|
|
461
508
|
has_init = True
|
|
462
509
|
else:
|
|
463
|
-
for root, _, files in
|
|
510
|
+
for root, _, files in walk_without_cycles(path):
|
|
464
511
|
for file in files:
|
|
465
512
|
if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
|
|
466
513
|
continue
|
metaflow/plugins/__init__.py
CHANGED
|
@@ -83,11 +83,13 @@ ENVIRONMENTS_DESC = [
|
|
|
83
83
|
METADATA_PROVIDERS_DESC = [
|
|
84
84
|
("service", ".metadata_providers.service.ServiceMetadataProvider"),
|
|
85
85
|
("local", ".metadata_providers.local.LocalMetadataProvider"),
|
|
86
|
+
("spin", ".metadata_providers.spin.SpinMetadataProvider"),
|
|
86
87
|
]
|
|
87
88
|
|
|
88
89
|
# Add datastore here
|
|
89
90
|
DATASTORES_DESC = [
|
|
90
91
|
("local", ".datastores.local_storage.LocalStorage"),
|
|
92
|
+
("spin", ".datastores.spin_storage.SpinStorage"),
|
|
91
93
|
("s3", ".datastores.s3_storage.S3Storage"),
|
|
92
94
|
("azure", ".datastores.azure_storage.AzureStorage"),
|
|
93
95
|
("gs", ".datastores.gs_storage.GSStorage"),
|
|
@@ -609,7 +609,16 @@ class ArgoWorkflows(object):
|
|
|
609
609
|
# the JSON equivalent of None to please argo-workflows. Unfortunately it
|
|
610
610
|
# has the side effect of casting the parameter value to string null during
|
|
611
611
|
# execution - which needs to be fixed imminently.
|
|
612
|
-
if
|
|
612
|
+
if default_value is None:
|
|
613
|
+
default_value = json.dumps(None)
|
|
614
|
+
elif param_type == "JSON":
|
|
615
|
+
if not isinstance(default_value, str):
|
|
616
|
+
# once to serialize the default value if needed.
|
|
617
|
+
default_value = json.dumps(default_value)
|
|
618
|
+
# adds outer quotes to param
|
|
619
|
+
default_value = json.dumps(default_value)
|
|
620
|
+
else:
|
|
621
|
+
# Make argo sensors happy
|
|
613
622
|
default_value = json.dumps(default_value)
|
|
614
623
|
|
|
615
624
|
parameters[param.name] = dict(
|
|
@@ -941,11 +950,7 @@ class ArgoWorkflows(object):
|
|
|
941
950
|
Arguments().parameters(
|
|
942
951
|
[
|
|
943
952
|
Parameter(parameter["name"])
|
|
944
|
-
.value(
|
|
945
|
-
"'%s'" % parameter["value"]
|
|
946
|
-
if parameter["type"] == "JSON"
|
|
947
|
-
else parameter["value"]
|
|
948
|
-
)
|
|
953
|
+
.value(parameter["value"])
|
|
949
954
|
.description(parameter.get("description"))
|
|
950
955
|
# TODO: Better handle IncludeFile in Argo Workflows UI.
|
|
951
956
|
for parameter in self.parameters.values()
|
|
@@ -2054,7 +2059,7 @@ class ArgoWorkflows(object):
|
|
|
2054
2059
|
# {{foo.bar['param_name']}}.
|
|
2055
2060
|
# https://argoproj.github.io/argo-events/tutorials/02-parameterization/
|
|
2056
2061
|
# http://masterminds.github.io/sprig/strings.html
|
|
2057
|
-
"--%s
|
|
2062
|
+
"--%s=\\\"$(python -m metaflow.plugins.argo.param_val {{=toBase64(workflow.parameters['%s'])}})\\\""
|
|
2058
2063
|
% (parameter["name"], parameter["name"])
|
|
2059
2064
|
for parameter in self.parameters.values()
|
|
2060
2065
|
]
|
|
@@ -3842,37 +3847,27 @@ class ArgoWorkflows(object):
|
|
|
3842
3847
|
# NOTE: We need the conditional logic in order to successfully fall back to the default value
|
|
3843
3848
|
# when the event payload does not contain a key for a parameter.
|
|
3844
3849
|
# NOTE: Keys might contain dashes, so use the safer 'get' for fetching the value
|
|
3845
|
-
data_template='{{ if (hasKey $.Input.body.payload "%s") }}
|
|
3850
|
+
data_template='{{ if (hasKey $.Input.body.payload "%s") }}%s{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
|
|
3846
3851
|
% (
|
|
3847
|
-
v,
|
|
3848
3852
|
v,
|
|
3849
3853
|
(
|
|
3850
|
-
"| toRawJson |
|
|
3854
|
+
'{{- $pv:=(get $.Input.body.payload "%s") -}}{{ if kindIs "string" $pv }}{{- $pv | toRawJson -}}{{- else -}}{{ $pv | toRawJson | toRawJson }}{{- end -}}'
|
|
3855
|
+
% v
|
|
3851
3856
|
if self.parameters[
|
|
3852
3857
|
parameter_name
|
|
3853
3858
|
]["type"]
|
|
3854
3859
|
== "JSON"
|
|
3855
|
-
else "| toRawJson
|
|
3860
|
+
else '{{- (get $.Input.body.payload "%s" | toRawJson) -}}'
|
|
3861
|
+
% v
|
|
3856
3862
|
),
|
|
3857
3863
|
),
|
|
3858
3864
|
# Unfortunately the sensor needs to
|
|
3859
3865
|
# record the default values for
|
|
3860
3866
|
# the parameters - there doesn't seem
|
|
3861
3867
|
# to be any way for us to skip
|
|
3862
|
-
value=
|
|
3863
|
-
|
|
3864
|
-
|
|
3865
|
-
"value"
|
|
3866
|
-
]
|
|
3867
|
-
)
|
|
3868
|
-
if self.parameters[parameter_name][
|
|
3869
|
-
"type"
|
|
3870
|
-
]
|
|
3871
|
-
== "JSON"
|
|
3872
|
-
else self.parameters[
|
|
3873
|
-
parameter_name
|
|
3874
|
-
]["value"]
|
|
3875
|
-
),
|
|
3868
|
+
value=self.parameters[parameter_name][
|
|
3869
|
+
"value"
|
|
3870
|
+
],
|
|
3876
3871
|
)
|
|
3877
3872
|
.dest(
|
|
3878
3873
|
# this undocumented (mis?)feature in
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def parse_parameter_value(base64_value):
|
|
7
|
+
val = base64.b64decode(base64_value).decode("utf-8")
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
return json.loads(val)
|
|
11
|
+
except json.decoder.JSONDecodeError:
|
|
12
|
+
# fallback to using the original value.
|
|
13
|
+
return val
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
base64_val = sys.argv[1]
|
|
18
|
+
|
|
19
|
+
print(parse_parameter_value(base64_val))
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
1
|
from collections import namedtuple
|
|
6
2
|
from io import BytesIO
|
|
7
3
|
import os
|
|
@@ -13,6 +9,7 @@ from metaflow.metaflow_config import (
|
|
|
13
9
|
CARD_S3ROOT,
|
|
14
10
|
CARD_LOCALROOT,
|
|
15
11
|
DATASTORE_LOCAL_DIR,
|
|
12
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
16
13
|
CARD_SUFFIX,
|
|
17
14
|
CARD_AZUREROOT,
|
|
18
15
|
CARD_GSROOT,
|
|
@@ -62,25 +59,28 @@ class CardDatastore(object):
|
|
|
62
59
|
return CARD_AZUREROOT
|
|
63
60
|
elif storage_type == "gs":
|
|
64
61
|
return CARD_GSROOT
|
|
65
|
-
elif storage_type == "local":
|
|
62
|
+
elif storage_type == "local" or storage_type == "spin":
|
|
66
63
|
# Borrowing some of the logic from LocalStorage.get_storage_root
|
|
67
64
|
result = CARD_LOCALROOT
|
|
65
|
+
local_dir = (
|
|
66
|
+
DATASTORE_SPIN_LOCAL_DIR
|
|
67
|
+
if storage_type == "spin"
|
|
68
|
+
else DATASTORE_LOCAL_DIR
|
|
69
|
+
)
|
|
68
70
|
if result is None:
|
|
69
71
|
current_path = os.getcwd()
|
|
70
|
-
check_dir = os.path.join(current_path,
|
|
72
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
71
73
|
check_dir = os.path.realpath(check_dir)
|
|
72
74
|
orig_path = check_dir
|
|
73
75
|
while not os.path.isdir(check_dir):
|
|
74
76
|
new_path = os.path.dirname(current_path)
|
|
75
77
|
if new_path == current_path:
|
|
76
|
-
|
|
78
|
+
# No longer making upward progress so we
|
|
79
|
+
# return the top level path
|
|
80
|
+
return os.path.join(orig_path, CARD_SUFFIX)
|
|
77
81
|
current_path = new_path
|
|
78
|
-
check_dir = os.path.join(
|
|
79
|
-
|
|
80
|
-
)
|
|
81
|
-
result = orig_path
|
|
82
|
-
|
|
83
|
-
return result
|
|
82
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
83
|
+
return os.path.join(check_dir, CARD_SUFFIX)
|
|
84
84
|
else:
|
|
85
85
|
# Let's make it obvious we need to update this block for each new datastore backend...
|
|
86
86
|
raise NotImplementedError(
|
|
@@ -496,9 +496,15 @@ class TaskInfoComponent(MetaflowCardComponent):
|
|
|
496
496
|
)
|
|
497
497
|
|
|
498
498
|
# ignore the name as a parameter
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
499
|
+
if "_parameters" not in self._task.parent.parent:
|
|
500
|
+
# In case of spin steps, there is no _parameters task
|
|
501
|
+
param_ids = []
|
|
502
|
+
else:
|
|
503
|
+
param_ids = [
|
|
504
|
+
p.id
|
|
505
|
+
for p in self._task.parent.parent["_parameters"].task
|
|
506
|
+
if p.id != "name"
|
|
507
|
+
]
|
|
502
508
|
if len(param_ids) > 0:
|
|
503
509
|
# Extract parameter from the Parameter Task. That is less brittle.
|
|
504
510
|
parameter_data = TaskToDict(
|
|
@@ -1,24 +1,29 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from metaflow.metaflow_config import
|
|
4
|
+
from metaflow.metaflow_config import (
|
|
5
|
+
DATASTORE_LOCAL_DIR,
|
|
6
|
+
DATASTORE_SYSROOT_LOCAL,
|
|
7
|
+
)
|
|
5
8
|
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class LocalStorage(DataStoreStorage):
|
|
9
12
|
TYPE = "local"
|
|
10
13
|
METADATA_DIR = "_meta"
|
|
14
|
+
DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
|
|
15
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_LOCAL
|
|
11
16
|
|
|
12
17
|
@classmethod
|
|
13
18
|
def get_datastore_root_from_config(cls, echo, create_on_absent=True):
|
|
14
|
-
result =
|
|
19
|
+
result = cls.SYSROOT_VAR
|
|
15
20
|
if result is None:
|
|
16
21
|
try:
|
|
17
22
|
# Python2
|
|
18
23
|
current_path = os.getcwdu()
|
|
19
24
|
except: # noqa E722
|
|
20
25
|
current_path = os.getcwd()
|
|
21
|
-
check_dir = os.path.join(current_path,
|
|
26
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
22
27
|
check_dir = os.path.realpath(check_dir)
|
|
23
28
|
orig_path = check_dir
|
|
24
29
|
top_level_reached = False
|
|
@@ -28,12 +33,13 @@ class LocalStorage(DataStoreStorage):
|
|
|
28
33
|
top_level_reached = True
|
|
29
34
|
break # We are no longer making upward progress
|
|
30
35
|
current_path = new_path
|
|
31
|
-
check_dir = os.path.join(current_path,
|
|
36
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
32
37
|
if top_level_reached:
|
|
33
38
|
if create_on_absent:
|
|
34
39
|
# Could not find any directory to use so create a new one
|
|
35
40
|
echo(
|
|
36
|
-
"Creating
|
|
41
|
+
"Creating %s datastore in current directory (%s)"
|
|
42
|
+
% (cls.TYPE, orig_path)
|
|
37
43
|
)
|
|
38
44
|
os.mkdir(orig_path)
|
|
39
45
|
result = orig_path
|
|
@@ -42,7 +48,7 @@ class LocalStorage(DataStoreStorage):
|
|
|
42
48
|
else:
|
|
43
49
|
result = check_dir
|
|
44
50
|
else:
|
|
45
|
-
result = os.path.join(result,
|
|
51
|
+
result = os.path.join(result, cls.DATASTORE_DIR)
|
|
46
52
|
return result
|
|
47
53
|
|
|
48
54
|
@staticmethod
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from metaflow.metaflow_config import (
|
|
2
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
3
|
+
DATASTORE_SYSROOT_SPIN,
|
|
4
|
+
)
|
|
5
|
+
from metaflow.plugins.datastores.local_storage import LocalStorage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SpinStorage(LocalStorage):
|
|
9
|
+
TYPE = "spin"
|
|
10
|
+
METADATA_DIR = "_meta"
|
|
11
|
+
DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
|
|
12
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_SPIN
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
DATATOOLS_S3ROOT,
|
|
19
19
|
S3_RETRY_COUNT,
|
|
20
20
|
S3_TRANSIENT_RETRY_COUNT,
|
|
21
|
+
S3_LOG_TRANSIENT_RETRIES,
|
|
21
22
|
S3_SERVER_SIDE_ENCRYPTION,
|
|
22
23
|
S3_WORKER_COUNT,
|
|
23
24
|
TEMPDIR,
|
|
@@ -1760,17 +1761,35 @@ class S3(object):
|
|
|
1760
1761
|
# due to a transient failure so we try again.
|
|
1761
1762
|
transient_retry_count += 1
|
|
1762
1763
|
total_ok_count += last_ok_count
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1764
|
+
|
|
1765
|
+
if S3_LOG_TRANSIENT_RETRIES:
|
|
1766
|
+
# Extract transient error type from pending retry lines
|
|
1767
|
+
error_info = ""
|
|
1768
|
+
if pending_retries:
|
|
1769
|
+
try:
|
|
1770
|
+
# Parse the first line to get transient error type
|
|
1771
|
+
first_retry = json.loads(
|
|
1772
|
+
pending_retries[0].decode("utf-8").strip()
|
|
1773
|
+
)
|
|
1774
|
+
if "transient_error_type" in first_retry:
|
|
1775
|
+
error_info = (
|
|
1776
|
+
" (%s)" % first_retry["transient_error_type"]
|
|
1777
|
+
)
|
|
1778
|
+
except (json.JSONDecodeError, IndexError, KeyError):
|
|
1779
|
+
pass
|
|
1780
|
+
|
|
1781
|
+
print(
|
|
1782
|
+
"Transient S3 failure (attempt #%d) -- total success: %d, "
|
|
1783
|
+
"last attempt %d/%d -- remaining: %d%s"
|
|
1784
|
+
% (
|
|
1785
|
+
transient_retry_count,
|
|
1786
|
+
total_ok_count,
|
|
1787
|
+
last_ok_count,
|
|
1788
|
+
last_ok_count + last_retry_count,
|
|
1789
|
+
len(pending_retries),
|
|
1790
|
+
error_info,
|
|
1791
|
+
)
|
|
1772
1792
|
)
|
|
1773
|
-
)
|
|
1774
1793
|
if inject_failures == 0:
|
|
1775
1794
|
# Don't sleep when we are "faking" the failures
|
|
1776
1795
|
self._jitter_sleep(transient_retry_count)
|