ob-metaflow 2.18.12.1__py2.py3-none-any.whl → 2.19.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/graph.py +2 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/argo/argo_workflows.py +20 -25
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/cards/card_datastore.py +13 -13
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/click_api.py +4 -2
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {ob_metaflow-2.18.12.1.data → ob_metaflow-2.19.0.1.data}/data/share/metaflow/devtools/Makefile +10 -0
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/METADATA +2 -4
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/RECORD +52 -48
- {ob_metaflow-2.18.12.1.data → ob_metaflow-2.19.0.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {ob_metaflow-2.18.12.1.data → ob_metaflow-2.19.0.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.18.12.1.dist-info → ob_metaflow-2.19.0.1.dist-info}/top_level.txt +0 -0
metaflow/flowspec.py
CHANGED
|
@@ -307,7 +307,8 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
307
307
|
% (deco._flow_cls.__name__, cls.__name__)
|
|
308
308
|
)
|
|
309
309
|
debug.userconf_exec(
|
|
310
|
-
"Evaluating flow level decorator %s"
|
|
310
|
+
"Evaluating flow level decorator %s (pre-mutate)"
|
|
311
|
+
% deco.__class__.__name__
|
|
311
312
|
)
|
|
312
313
|
deco.pre_mutate(mutable_flow)
|
|
313
314
|
# We reset cached_parameters on the very off chance that the user added
|
|
@@ -324,7 +325,7 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
|
324
325
|
if isinstance(deco, StepMutator):
|
|
325
326
|
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
|
|
326
327
|
debug.userconf_exec(
|
|
327
|
-
"Evaluating step level decorator %s for %s"
|
|
328
|
+
"Evaluating step level decorator %s for %s (pre-mutate)"
|
|
328
329
|
% (deco.__class__.__name__, step.name)
|
|
329
330
|
)
|
|
330
331
|
deco.pre_mutate(
|
metaflow/graph.py
CHANGED
|
@@ -94,7 +94,7 @@ class DAGNode(object):
|
|
|
94
94
|
case_key = None
|
|
95
95
|
|
|
96
96
|
# handle string literals
|
|
97
|
-
if isinstance(key, ast.Str):
|
|
97
|
+
if hasattr(ast, "Str") and isinstance(key, ast.Str):
|
|
98
98
|
case_key = key.s
|
|
99
99
|
elif isinstance(key, ast.Constant):
|
|
100
100
|
case_key = key.value
|
|
@@ -171,7 +171,7 @@ class DAGNode(object):
|
|
|
171
171
|
# Get condition parameter
|
|
172
172
|
for keyword in tail.value.keywords:
|
|
173
173
|
if keyword.arg == "condition":
|
|
174
|
-
if isinstance(keyword.value, ast.Str):
|
|
174
|
+
if hasattr(ast, "Str") and isinstance(keyword.value, ast.Str):
|
|
175
175
|
condition_name = keyword.value.s
|
|
176
176
|
elif isinstance(keyword.value, ast.Constant) and isinstance(
|
|
177
177
|
keyword.value.value, str
|
metaflow/metaflow_config.py
CHANGED
|
@@ -21,6 +21,7 @@ if sys.platform == "darwin":
|
|
|
21
21
|
|
|
22
22
|
# Path to the local directory to store artifacts for 'local' datastore.
|
|
23
23
|
DATASTORE_LOCAL_DIR = ".metaflow"
|
|
24
|
+
DATASTORE_SPIN_LOCAL_DIR = ".metaflow_spin"
|
|
24
25
|
|
|
25
26
|
# Local configuration file (in .metaflow) containing overrides per-project
|
|
26
27
|
LOCAL_CONFIG_FILE = "config.json"
|
|
@@ -47,6 +48,38 @@ DEFAULT_FROM_DEPLOYMENT_IMPL = from_conf(
|
|
|
47
48
|
"DEFAULT_FROM_DEPLOYMENT_IMPL", "argo-workflows"
|
|
48
49
|
)
|
|
49
50
|
|
|
51
|
+
###
|
|
52
|
+
# Spin configuration
|
|
53
|
+
###
|
|
54
|
+
# Essentially a whitelist of decorators that are allowed in Spin steps
|
|
55
|
+
SPIN_ALLOWED_DECORATORS = from_conf(
|
|
56
|
+
"SPIN_ALLOWED_DECORATORS",
|
|
57
|
+
[
|
|
58
|
+
"conda",
|
|
59
|
+
"pypi",
|
|
60
|
+
"conda_base",
|
|
61
|
+
"pypi_base",
|
|
62
|
+
"environment",
|
|
63
|
+
"project",
|
|
64
|
+
"timeout",
|
|
65
|
+
"conda_env_internal",
|
|
66
|
+
"card",
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Essentially a blacklist of decorators that are not allowed in Spin steps
|
|
71
|
+
# Note: decorators not in either SPIN_ALLOWED_DECORATORS or SPIN_DISALLOWED_DECORATORS
|
|
72
|
+
# are simply ignored in Spin steps
|
|
73
|
+
SPIN_DISALLOWED_DECORATORS = from_conf(
|
|
74
|
+
"SPIN_DISALLOWED_DECORATORS",
|
|
75
|
+
[
|
|
76
|
+
"parallel",
|
|
77
|
+
],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Default value for persist option in spin command
|
|
81
|
+
SPIN_PERSIST = from_conf("SPIN_PERSIST", False)
|
|
82
|
+
|
|
50
83
|
###
|
|
51
84
|
# User configuration
|
|
52
85
|
###
|
|
@@ -57,6 +90,7 @@ USER = from_conf("USER")
|
|
|
57
90
|
# Datastore configuration
|
|
58
91
|
###
|
|
59
92
|
DATASTORE_SYSROOT_LOCAL = from_conf("DATASTORE_SYSROOT_LOCAL")
|
|
93
|
+
DATASTORE_SYSROOT_SPIN = from_conf("DATASTORE_SYSROOT_SPIN")
|
|
60
94
|
# S3 bucket and prefix to store artifacts for 's3' datastore.
|
|
61
95
|
DATASTORE_SYSROOT_S3 = from_conf("DATASTORE_SYSROOT_S3")
|
|
62
96
|
# Azure Blob Storage container and blob prefix
|
|
@@ -109,6 +143,9 @@ S3_WORKER_COUNT = from_conf("S3_WORKER_COUNT", 64)
|
|
|
109
143
|
# top-level retries)
|
|
110
144
|
S3_TRANSIENT_RETRY_COUNT = from_conf("S3_TRANSIENT_RETRY_COUNT", 20)
|
|
111
145
|
|
|
146
|
+
# Whether to log transient retry messages to stdout
|
|
147
|
+
S3_LOG_TRANSIENT_RETRIES = from_conf("S3_LOG_TRANSIENT_RETRIES", False)
|
|
148
|
+
|
|
112
149
|
# S3 retry configuration used in the aws client
|
|
113
150
|
# Use the adaptive retry strategy by default
|
|
114
151
|
S3_CLIENT_RETRY_CONFIG = from_conf(
|
|
@@ -462,6 +499,10 @@ ESCAPE_HATCH_WARNING = from_conf("ESCAPE_HATCH_WARNING", True)
|
|
|
462
499
|
###
|
|
463
500
|
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE = from_conf("FEAT_ALWAYS_UPLOAD_CODE_PACKAGE", False)
|
|
464
501
|
###
|
|
502
|
+
# Profile
|
|
503
|
+
###
|
|
504
|
+
PROFILE_FROM_START = from_conf("PROFILE_FROM_START", False)
|
|
505
|
+
###
|
|
465
506
|
# Debug configuration
|
|
466
507
|
###
|
|
467
508
|
DEBUG_OPTIONS = [
|
metaflow/metaflow_profile.py
CHANGED
|
@@ -2,6 +2,24 @@ import time
|
|
|
2
2
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
|
|
5
|
+
from .metaflow_config import PROFILE_FROM_START
|
|
6
|
+
|
|
7
|
+
init_time = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if PROFILE_FROM_START:
|
|
11
|
+
|
|
12
|
+
def from_start(msg: str):
|
|
13
|
+
global init_time
|
|
14
|
+
if init_time is None:
|
|
15
|
+
init_time = time.time()
|
|
16
|
+
print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
|
|
17
|
+
|
|
18
|
+
else:
|
|
19
|
+
|
|
20
|
+
def from_start(_msg: str):
|
|
21
|
+
pass
|
|
22
|
+
|
|
5
23
|
|
|
6
24
|
@contextmanager
|
|
7
25
|
def profile(label, stats_dict=None):
|
metaflow/packaging_sys/utils.py
CHANGED
|
@@ -2,45 +2,7 @@ import os
|
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
from typing import Callable, Generator, List, Optional, Tuple
|
|
4
4
|
|
|
5
|
-
from ..util import to_unicode
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# this is os.walk(follow_symlinks=True) with cycle detection
|
|
9
|
-
def walk_without_cycles(
|
|
10
|
-
top_root: str,
|
|
11
|
-
exclude_dirs: Optional[List[str]] = None,
|
|
12
|
-
) -> Generator[Tuple[str, List[str]], None, None]:
|
|
13
|
-
seen = set()
|
|
14
|
-
|
|
15
|
-
default_skip_dirs = ["__pycache__"]
|
|
16
|
-
|
|
17
|
-
def _recurse(root, skip_dirs):
|
|
18
|
-
for parent, dirs, files in os.walk(root):
|
|
19
|
-
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
20
|
-
for d in dirs:
|
|
21
|
-
path = os.path.join(parent, d)
|
|
22
|
-
if os.path.islink(path):
|
|
23
|
-
# Breaking loops: never follow the same symlink twice
|
|
24
|
-
#
|
|
25
|
-
# NOTE: this also means that links to sibling links are
|
|
26
|
-
# not followed. In this case:
|
|
27
|
-
#
|
|
28
|
-
# x -> y
|
|
29
|
-
# y -> oo
|
|
30
|
-
# oo/real_file
|
|
31
|
-
#
|
|
32
|
-
# real_file is only included twice, not three times
|
|
33
|
-
reallink = os.path.realpath(path)
|
|
34
|
-
if reallink not in seen:
|
|
35
|
-
seen.add(reallink)
|
|
36
|
-
for x in _recurse(path, default_skip_dirs):
|
|
37
|
-
yield x
|
|
38
|
-
yield parent, files
|
|
39
|
-
|
|
40
|
-
skip_dirs = set(default_skip_dirs + (exclude_dirs or []))
|
|
41
|
-
for x in _recurse(top_root, skip_dirs):
|
|
42
|
-
skip_dirs = default_skip_dirs
|
|
43
|
-
yield x
|
|
5
|
+
from ..util import to_unicode, walk_without_cycles
|
|
44
6
|
|
|
45
7
|
|
|
46
8
|
def walk(
|
|
@@ -53,6 +15,7 @@ def walk(
|
|
|
53
15
|
prefixlen = len("%s/" % os.path.dirname(root))
|
|
54
16
|
for (
|
|
55
17
|
path,
|
|
18
|
+
_,
|
|
56
19
|
files,
|
|
57
20
|
) in walk_without_cycles(root, exclude_tl_dirs):
|
|
58
21
|
if exclude_hidden and "/." in path:
|
metaflow/packaging_sys/v1.py
CHANGED
|
@@ -16,7 +16,7 @@ from ..exception import MetaflowException
|
|
|
16
16
|
from ..metaflow_version import get_version
|
|
17
17
|
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
|
18
18
|
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
|
19
|
-
from ..util import get_metaflow_root
|
|
19
|
+
from ..util import get_metaflow_root, walk_without_cycles
|
|
20
20
|
from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
|
|
21
21
|
from .distribution_support import _ModuleInfo, modules_to_distributions
|
|
22
22
|
from .utils import suffix_filter, walk
|
|
@@ -269,12 +269,50 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
269
269
|
# If the module is a single file, we handle this here by looking at __file__
|
|
270
270
|
# which will point to the single file. If it is an actual module, __path__
|
|
271
271
|
# will contain the path(s) to the module
|
|
272
|
+
if hasattr(module, "__file__") and module.__file__:
|
|
273
|
+
root_paths = [Path(module.__file__).resolve().as_posix()]
|
|
274
|
+
else:
|
|
275
|
+
root_paths = []
|
|
276
|
+
seen_path_values = set()
|
|
277
|
+
new_paths = module.__spec__.submodule_search_locations
|
|
278
|
+
while new_paths:
|
|
279
|
+
paths = new_paths
|
|
280
|
+
new_paths = []
|
|
281
|
+
for p in paths:
|
|
282
|
+
if p in seen_path_values:
|
|
283
|
+
continue
|
|
284
|
+
if os.path.isdir(p):
|
|
285
|
+
root_paths.append(Path(p).resolve().as_posix())
|
|
286
|
+
elif p in sys.path_importer_cache:
|
|
287
|
+
# We have a path hook that we likely need to call to get the actual path
|
|
288
|
+
addl_spec = sys.path_importer_cache[p].find_spec(name)
|
|
289
|
+
if (
|
|
290
|
+
addl_spec is not None
|
|
291
|
+
and addl_spec.submodule_search_locations
|
|
292
|
+
):
|
|
293
|
+
new_paths.extend(addl_spec.submodule_search_locations)
|
|
294
|
+
else:
|
|
295
|
+
# This may not be as required since it is likely the importer cache has
|
|
296
|
+
# everything already but just in case, we will also go through the
|
|
297
|
+
# path hooks and see if we find another one
|
|
298
|
+
for path_hook in sys.path_hooks:
|
|
299
|
+
try:
|
|
300
|
+
finder = path_hook(p)
|
|
301
|
+
addl_spec = finder.find_spec(name)
|
|
302
|
+
if (
|
|
303
|
+
addl_spec is not None
|
|
304
|
+
and addl_spec.submodule_search_locations
|
|
305
|
+
):
|
|
306
|
+
new_paths.extend(
|
|
307
|
+
addl_spec.submodule_search_locations
|
|
308
|
+
)
|
|
309
|
+
break
|
|
310
|
+
except ImportError:
|
|
311
|
+
continue
|
|
312
|
+
seen_path_values.add(p)
|
|
272
313
|
self._modules[name] = _ModuleInfo(
|
|
273
314
|
name,
|
|
274
|
-
set(
|
|
275
|
-
Path(p).resolve().as_posix()
|
|
276
|
-
for p in getattr(module, "__path__", [module.__file__])
|
|
277
|
-
),
|
|
315
|
+
set(root_paths),
|
|
278
316
|
module,
|
|
279
317
|
False, # This is not a Metaflow module (added by the user manually)
|
|
280
318
|
)
|
|
@@ -417,15 +455,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
417
455
|
% (dist_name, name)
|
|
418
456
|
)
|
|
419
457
|
dist_root = str(dist.locate_file(name))
|
|
420
|
-
|
|
421
|
-
# This is an error because it means that this distribution is
|
|
422
|
-
# not contributing to the module.
|
|
423
|
-
raise RuntimeError(
|
|
424
|
-
"Distribution '%s' is not contributing to module '%s' as "
|
|
425
|
-
"expected (got '%s' when expected one of %s)"
|
|
426
|
-
% (dist.metadata["Name"], name, dist_root, paths)
|
|
427
|
-
)
|
|
428
|
-
paths.discard(dist_root)
|
|
458
|
+
has_file_in_root = False
|
|
429
459
|
if dist_name not in self._distmetainfo:
|
|
430
460
|
# Possible that a distribution contributes to multiple modules
|
|
431
461
|
self._distmetainfo[dist_name] = {
|
|
@@ -438,13 +468,30 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
438
468
|
for file in dist.files or []:
|
|
439
469
|
# Skip files that do not belong to this module (distribution may
|
|
440
470
|
# provide multiple modules)
|
|
441
|
-
if
|
|
471
|
+
if (
|
|
472
|
+
file.parts[: len(prefix_parts)] != prefix_parts
|
|
473
|
+
or file.suffix == ".pth"
|
|
474
|
+
or str(file).startswith("__editable__")
|
|
475
|
+
):
|
|
442
476
|
continue
|
|
443
477
|
if file.parts[len(prefix_parts)] == "__init__.py":
|
|
444
478
|
has_init = True
|
|
479
|
+
has_file_in_root = True
|
|
480
|
+
# At this point, we know that we are seeing actual files in the
|
|
481
|
+
# dist_root so we make sure it is as expected
|
|
482
|
+
if dist_root not in paths:
|
|
483
|
+
# This is an error because it means that this distribution is
|
|
484
|
+
# not contributing to the module.
|
|
485
|
+
raise RuntimeError(
|
|
486
|
+
"Distribution '%s' is not contributing to module '%s' as "
|
|
487
|
+
"expected (got '%s' when expected one of %s)"
|
|
488
|
+
% (dist.metadata["Name"], name, dist_root, paths)
|
|
489
|
+
)
|
|
445
490
|
yield str(
|
|
446
491
|
dist.locate_file(file).resolve().as_posix()
|
|
447
492
|
), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
|
|
493
|
+
if has_file_in_root:
|
|
494
|
+
paths.discard(dist_root)
|
|
448
495
|
|
|
449
496
|
# Now if there are more paths left in paths, it means there is a non-distribution
|
|
450
497
|
# component to this package which we also include.
|
|
@@ -460,7 +507,7 @@ class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
|
|
|
460
507
|
)
|
|
461
508
|
has_init = True
|
|
462
509
|
else:
|
|
463
|
-
for root, _, files in
|
|
510
|
+
for root, _, files in walk_without_cycles(path):
|
|
464
511
|
for file in files:
|
|
465
512
|
if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
|
|
466
513
|
continue
|
metaflow/plugins/__init__.py
CHANGED
|
@@ -83,11 +83,13 @@ ENVIRONMENTS_DESC = [
|
|
|
83
83
|
METADATA_PROVIDERS_DESC = [
|
|
84
84
|
("service", ".metadata_providers.service.ServiceMetadataProvider"),
|
|
85
85
|
("local", ".metadata_providers.local.LocalMetadataProvider"),
|
|
86
|
+
("spin", ".metadata_providers.spin.SpinMetadataProvider"),
|
|
86
87
|
]
|
|
87
88
|
|
|
88
89
|
# Add datastore here
|
|
89
90
|
DATASTORES_DESC = [
|
|
90
91
|
("local", ".datastores.local_storage.LocalStorage"),
|
|
92
|
+
("spin", ".datastores.spin_storage.SpinStorage"),
|
|
91
93
|
("s3", ".datastores.s3_storage.S3Storage"),
|
|
92
94
|
("azure", ".datastores.azure_storage.AzureStorage"),
|
|
93
95
|
("gs", ".datastores.gs_storage.GSStorage"),
|
|
@@ -618,7 +618,16 @@ class ArgoWorkflows(object):
|
|
|
618
618
|
# the JSON equivalent of None to please argo-workflows. Unfortunately it
|
|
619
619
|
# has the side effect of casting the parameter value to string null during
|
|
620
620
|
# execution - which needs to be fixed imminently.
|
|
621
|
-
if
|
|
621
|
+
if default_value is None:
|
|
622
|
+
default_value = json.dumps(None)
|
|
623
|
+
elif param_type == "JSON":
|
|
624
|
+
if not isinstance(default_value, str):
|
|
625
|
+
# once to serialize the default value if needed.
|
|
626
|
+
default_value = json.dumps(default_value)
|
|
627
|
+
# adds outer quotes to param
|
|
628
|
+
default_value = json.dumps(default_value)
|
|
629
|
+
else:
|
|
630
|
+
# Make argo sensors happy
|
|
622
631
|
default_value = json.dumps(default_value)
|
|
623
632
|
|
|
624
633
|
parameters[param.name] = dict(
|
|
@@ -950,11 +959,7 @@ class ArgoWorkflows(object):
|
|
|
950
959
|
Arguments().parameters(
|
|
951
960
|
[
|
|
952
961
|
Parameter(parameter["name"])
|
|
953
|
-
.value(
|
|
954
|
-
"'%s'" % parameter["value"]
|
|
955
|
-
if parameter["type"] == "JSON"
|
|
956
|
-
else parameter["value"]
|
|
957
|
-
)
|
|
962
|
+
.value(parameter["value"])
|
|
958
963
|
.description(parameter.get("description"))
|
|
959
964
|
# TODO: Better handle IncludeFile in Argo Workflows UI.
|
|
960
965
|
for parameter in self.parameters.values()
|
|
@@ -2063,7 +2068,7 @@ class ArgoWorkflows(object):
|
|
|
2063
2068
|
# {{foo.bar['param_name']}}.
|
|
2064
2069
|
# https://argoproj.github.io/argo-events/tutorials/02-parameterization/
|
|
2065
2070
|
# http://masterminds.github.io/sprig/strings.html
|
|
2066
|
-
"--%s
|
|
2071
|
+
"--%s=\\\"$(python -m metaflow.plugins.argo.param_val {{=toBase64(workflow.parameters['%s'])}})\\\""
|
|
2067
2072
|
% (parameter["name"], parameter["name"])
|
|
2068
2073
|
for parameter in self.parameters.values()
|
|
2069
2074
|
]
|
|
@@ -3893,37 +3898,27 @@ class ArgoWorkflows(object):
|
|
|
3893
3898
|
# NOTE: We need the conditional logic in order to successfully fall back to the default value
|
|
3894
3899
|
# when the event payload does not contain a key for a parameter.
|
|
3895
3900
|
# NOTE: Keys might contain dashes, so use the safer 'get' for fetching the value
|
|
3896
|
-
data_template='{{ if (hasKey $.Input.body.payload "%s") }}
|
|
3901
|
+
data_template='{{ if (hasKey $.Input.body.payload "%s") }}%s{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
|
|
3897
3902
|
% (
|
|
3898
|
-
v,
|
|
3899
3903
|
v,
|
|
3900
3904
|
(
|
|
3901
|
-
"| toRawJson |
|
|
3905
|
+
'{{- $pv:=(get $.Input.body.payload "%s") -}}{{ if kindIs "string" $pv }}{{- $pv | toRawJson -}}{{- else -}}{{ $pv | toRawJson | toRawJson }}{{- end -}}'
|
|
3906
|
+
% v
|
|
3902
3907
|
if self.parameters[
|
|
3903
3908
|
parameter_name
|
|
3904
3909
|
]["type"]
|
|
3905
3910
|
== "JSON"
|
|
3906
|
-
else "| toRawJson
|
|
3911
|
+
else '{{- (get $.Input.body.payload "%s" | toRawJson) -}}'
|
|
3912
|
+
% v
|
|
3907
3913
|
),
|
|
3908
3914
|
),
|
|
3909
3915
|
# Unfortunately the sensor needs to
|
|
3910
3916
|
# record the default values for
|
|
3911
3917
|
# the parameters - there doesn't seem
|
|
3912
3918
|
# to be any way for us to skip
|
|
3913
|
-
value=
|
|
3914
|
-
|
|
3915
|
-
|
|
3916
|
-
"value"
|
|
3917
|
-
]
|
|
3918
|
-
)
|
|
3919
|
-
if self.parameters[parameter_name][
|
|
3920
|
-
"type"
|
|
3921
|
-
]
|
|
3922
|
-
== "JSON"
|
|
3923
|
-
else self.parameters[
|
|
3924
|
-
parameter_name
|
|
3925
|
-
]["value"]
|
|
3926
|
-
),
|
|
3919
|
+
value=self.parameters[parameter_name][
|
|
3920
|
+
"value"
|
|
3921
|
+
],
|
|
3927
3922
|
)
|
|
3928
3923
|
.dest(
|
|
3929
3924
|
# this undocumented (mis?)feature in
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def parse_parameter_value(base64_value):
|
|
7
|
+
val = base64.b64decode(base64_value).decode("utf-8")
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
return json.loads(val)
|
|
11
|
+
except json.decoder.JSONDecodeError:
|
|
12
|
+
# fallback to using the original value.
|
|
13
|
+
return val
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
base64_val = sys.argv[1]
|
|
18
|
+
|
|
19
|
+
print(parse_parameter_value(base64_val))
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
1
|
from collections import namedtuple
|
|
6
2
|
from io import BytesIO
|
|
7
3
|
import os
|
|
@@ -13,6 +9,7 @@ from metaflow.metaflow_config import (
|
|
|
13
9
|
CARD_S3ROOT,
|
|
14
10
|
CARD_LOCALROOT,
|
|
15
11
|
DATASTORE_LOCAL_DIR,
|
|
12
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
16
13
|
CARD_SUFFIX,
|
|
17
14
|
CARD_AZUREROOT,
|
|
18
15
|
CARD_GSROOT,
|
|
@@ -62,25 +59,28 @@ class CardDatastore(object):
|
|
|
62
59
|
return CARD_AZUREROOT
|
|
63
60
|
elif storage_type == "gs":
|
|
64
61
|
return CARD_GSROOT
|
|
65
|
-
elif storage_type == "local":
|
|
62
|
+
elif storage_type == "local" or storage_type == "spin":
|
|
66
63
|
# Borrowing some of the logic from LocalStorage.get_storage_root
|
|
67
64
|
result = CARD_LOCALROOT
|
|
65
|
+
local_dir = (
|
|
66
|
+
DATASTORE_SPIN_LOCAL_DIR
|
|
67
|
+
if storage_type == "spin"
|
|
68
|
+
else DATASTORE_LOCAL_DIR
|
|
69
|
+
)
|
|
68
70
|
if result is None:
|
|
69
71
|
current_path = os.getcwd()
|
|
70
|
-
check_dir = os.path.join(current_path,
|
|
72
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
71
73
|
check_dir = os.path.realpath(check_dir)
|
|
72
74
|
orig_path = check_dir
|
|
73
75
|
while not os.path.isdir(check_dir):
|
|
74
76
|
new_path = os.path.dirname(current_path)
|
|
75
77
|
if new_path == current_path:
|
|
76
|
-
|
|
78
|
+
# No longer making upward progress so we
|
|
79
|
+
# return the top level path
|
|
80
|
+
return os.path.join(orig_path, CARD_SUFFIX)
|
|
77
81
|
current_path = new_path
|
|
78
|
-
check_dir = os.path.join(
|
|
79
|
-
|
|
80
|
-
)
|
|
81
|
-
result = orig_path
|
|
82
|
-
|
|
83
|
-
return result
|
|
82
|
+
check_dir = os.path.join(current_path, local_dir)
|
|
83
|
+
return os.path.join(check_dir, CARD_SUFFIX)
|
|
84
84
|
else:
|
|
85
85
|
# Let's make it obvious we need to update this block for each new datastore backend...
|
|
86
86
|
raise NotImplementedError(
|
|
@@ -496,9 +496,15 @@ class TaskInfoComponent(MetaflowCardComponent):
|
|
|
496
496
|
)
|
|
497
497
|
|
|
498
498
|
# ignore the name as a parameter
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
499
|
+
if "_parameters" not in self._task.parent.parent:
|
|
500
|
+
# In case of spin steps, there is no _parameters task
|
|
501
|
+
param_ids = []
|
|
502
|
+
else:
|
|
503
|
+
param_ids = [
|
|
504
|
+
p.id
|
|
505
|
+
for p in self._task.parent.parent["_parameters"].task
|
|
506
|
+
if p.id != "name"
|
|
507
|
+
]
|
|
502
508
|
if len(param_ids) > 0:
|
|
503
509
|
# Extract parameter from the Parameter Task. That is less brittle.
|
|
504
510
|
parameter_data = TaskToDict(
|
|
@@ -1,24 +1,29 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from metaflow.metaflow_config import
|
|
4
|
+
from metaflow.metaflow_config import (
|
|
5
|
+
DATASTORE_LOCAL_DIR,
|
|
6
|
+
DATASTORE_SYSROOT_LOCAL,
|
|
7
|
+
)
|
|
5
8
|
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class LocalStorage(DataStoreStorage):
|
|
9
12
|
TYPE = "local"
|
|
10
13
|
METADATA_DIR = "_meta"
|
|
14
|
+
DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
|
|
15
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_LOCAL
|
|
11
16
|
|
|
12
17
|
@classmethod
|
|
13
18
|
def get_datastore_root_from_config(cls, echo, create_on_absent=True):
|
|
14
|
-
result =
|
|
19
|
+
result = cls.SYSROOT_VAR
|
|
15
20
|
if result is None:
|
|
16
21
|
try:
|
|
17
22
|
# Python2
|
|
18
23
|
current_path = os.getcwdu()
|
|
19
24
|
except: # noqa E722
|
|
20
25
|
current_path = os.getcwd()
|
|
21
|
-
check_dir = os.path.join(current_path,
|
|
26
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
22
27
|
check_dir = os.path.realpath(check_dir)
|
|
23
28
|
orig_path = check_dir
|
|
24
29
|
top_level_reached = False
|
|
@@ -28,12 +33,13 @@ class LocalStorage(DataStoreStorage):
|
|
|
28
33
|
top_level_reached = True
|
|
29
34
|
break # We are no longer making upward progress
|
|
30
35
|
current_path = new_path
|
|
31
|
-
check_dir = os.path.join(current_path,
|
|
36
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
32
37
|
if top_level_reached:
|
|
33
38
|
if create_on_absent:
|
|
34
39
|
# Could not find any directory to use so create a new one
|
|
35
40
|
echo(
|
|
36
|
-
"Creating
|
|
41
|
+
"Creating %s datastore in current directory (%s)"
|
|
42
|
+
% (cls.TYPE, orig_path)
|
|
37
43
|
)
|
|
38
44
|
os.mkdir(orig_path)
|
|
39
45
|
result = orig_path
|
|
@@ -42,7 +48,7 @@ class LocalStorage(DataStoreStorage):
|
|
|
42
48
|
else:
|
|
43
49
|
result = check_dir
|
|
44
50
|
else:
|
|
45
|
-
result = os.path.join(result,
|
|
51
|
+
result = os.path.join(result, cls.DATASTORE_DIR)
|
|
46
52
|
return result
|
|
47
53
|
|
|
48
54
|
@staticmethod
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from metaflow.metaflow_config import (
|
|
2
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
3
|
+
DATASTORE_SYSROOT_SPIN,
|
|
4
|
+
)
|
|
5
|
+
from metaflow.plugins.datastores.local_storage import LocalStorage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SpinStorage(LocalStorage):
|
|
9
|
+
TYPE = "spin"
|
|
10
|
+
METADATA_DIR = "_meta"
|
|
11
|
+
DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
|
|
12
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_SPIN
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
DATATOOLS_S3ROOT,
|
|
19
19
|
S3_RETRY_COUNT,
|
|
20
20
|
S3_TRANSIENT_RETRY_COUNT,
|
|
21
|
+
S3_LOG_TRANSIENT_RETRIES,
|
|
21
22
|
S3_SERVER_SIDE_ENCRYPTION,
|
|
22
23
|
S3_WORKER_COUNT,
|
|
23
24
|
TEMPDIR,
|
|
@@ -1760,17 +1761,35 @@ class S3(object):
|
|
|
1760
1761
|
# due to a transient failure so we try again.
|
|
1761
1762
|
transient_retry_count += 1
|
|
1762
1763
|
total_ok_count += last_ok_count
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1764
|
+
|
|
1765
|
+
if S3_LOG_TRANSIENT_RETRIES:
|
|
1766
|
+
# Extract transient error type from pending retry lines
|
|
1767
|
+
error_info = ""
|
|
1768
|
+
if pending_retries:
|
|
1769
|
+
try:
|
|
1770
|
+
# Parse the first line to get transient error type
|
|
1771
|
+
first_retry = json.loads(
|
|
1772
|
+
pending_retries[0].decode("utf-8").strip()
|
|
1773
|
+
)
|
|
1774
|
+
if "transient_error_type" in first_retry:
|
|
1775
|
+
error_info = (
|
|
1776
|
+
" (%s)" % first_retry["transient_error_type"]
|
|
1777
|
+
)
|
|
1778
|
+
except (json.JSONDecodeError, IndexError, KeyError):
|
|
1779
|
+
pass
|
|
1780
|
+
|
|
1781
|
+
print(
|
|
1782
|
+
"Transient S3 failure (attempt #%d) -- total success: %d, "
|
|
1783
|
+
"last attempt %d/%d -- remaining: %d%s"
|
|
1784
|
+
% (
|
|
1785
|
+
transient_retry_count,
|
|
1786
|
+
total_ok_count,
|
|
1787
|
+
last_ok_count,
|
|
1788
|
+
last_ok_count + last_retry_count,
|
|
1789
|
+
len(pending_retries),
|
|
1790
|
+
error_info,
|
|
1791
|
+
)
|
|
1772
1792
|
)
|
|
1773
|
-
)
|
|
1774
1793
|
if inject_failures == 0:
|
|
1775
1794
|
# Don't sleep when we are "faking" the failures
|
|
1776
1795
|
self._jitter_sleep(transient_retry_count)
|