ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,6 @@ import pathlib
|
|
|
7
7
|
import re
|
|
8
8
|
import time
|
|
9
9
|
import typing
|
|
10
|
-
|
|
11
10
|
from datetime import datetime
|
|
12
11
|
from io import StringIO
|
|
13
12
|
from types import ModuleType
|
|
@@ -335,6 +334,8 @@ class StubGenerator:
|
|
|
335
334
|
|
|
336
335
|
# Imports that are needed at the top of the file
|
|
337
336
|
self._imports = set() # type: Set[str]
|
|
337
|
+
|
|
338
|
+
self._sub_module_imports = set() # type: Set[Tuple[str, str]]``
|
|
338
339
|
# Typing imports (behind if TYPE_CHECKING) that are needed at the top of the file
|
|
339
340
|
self._typing_imports = set() # type: Set[str]
|
|
340
341
|
# Typevars that are defined
|
|
@@ -488,9 +489,6 @@ class StubGenerator:
|
|
|
488
489
|
self._imports.add(name)
|
|
489
490
|
|
|
490
491
|
def _add_to_typing_check(name, is_module=False):
|
|
491
|
-
# if name != self._current_module_name:
|
|
492
|
-
# self._typing_imports.add(name)
|
|
493
|
-
#
|
|
494
492
|
if name == "None":
|
|
495
493
|
return
|
|
496
494
|
if is_module:
|
|
@@ -504,6 +502,24 @@ class StubGenerator:
|
|
|
504
502
|
# the current file
|
|
505
503
|
self._typing_imports.add(splits[0])
|
|
506
504
|
|
|
505
|
+
def _format_qualified_class_name(cls: type) -> str:
|
|
506
|
+
"""Helper to format a class with its qualified module name"""
|
|
507
|
+
# Special case for NoneType - return None
|
|
508
|
+
if cls.__name__ == "NoneType":
|
|
509
|
+
return "None"
|
|
510
|
+
|
|
511
|
+
module = inspect.getmodule(cls)
|
|
512
|
+
if (
|
|
513
|
+
module
|
|
514
|
+
and module.__name__ != "builtins"
|
|
515
|
+
and module.__name__ != "__main__"
|
|
516
|
+
):
|
|
517
|
+
module_name = self._get_module_name_alias(module.__name__)
|
|
518
|
+
_add_to_typing_check(module_name, is_module=True)
|
|
519
|
+
return f"{module_name}.{cls.__name__}"
|
|
520
|
+
else:
|
|
521
|
+
return cls.__name__
|
|
522
|
+
|
|
507
523
|
if isinstance(element, str):
|
|
508
524
|
# Special case for self referential things (particularly in a class)
|
|
509
525
|
if element == self._current_name:
|
|
@@ -557,19 +573,15 @@ class StubGenerator:
|
|
|
557
573
|
return element.__name__
|
|
558
574
|
elif isinstance(element, type(Ellipsis)):
|
|
559
575
|
return "..."
|
|
560
|
-
# elif (
|
|
561
|
-
# isinstance(element, typing._GenericAlias)
|
|
562
|
-
# and hasattr(element, "_name")
|
|
563
|
-
# and element._name in ("List", "Tuple", "Dict", "Set")
|
|
564
|
-
# ):
|
|
565
|
-
# # 3.7 has these as _GenericAlias but they don't behave like the ones in 3.10
|
|
566
|
-
# _add_to_import("typing")
|
|
567
|
-
# return str(element)
|
|
568
576
|
elif isinstance(element, typing._GenericAlias):
|
|
569
577
|
# We need to check things recursively in __args__ if it exists
|
|
570
578
|
args_str = []
|
|
571
579
|
for arg in getattr(element, "__args__", []):
|
|
572
|
-
|
|
580
|
+
# Special handling for class objects in type arguments
|
|
581
|
+
if isinstance(arg, type):
|
|
582
|
+
args_str.append(_format_qualified_class_name(arg))
|
|
583
|
+
else:
|
|
584
|
+
args_str.append(self._get_element_name_with_module(arg))
|
|
573
585
|
|
|
574
586
|
_add_to_import("typing")
|
|
575
587
|
if element._name:
|
|
@@ -584,12 +596,15 @@ class StubGenerator:
|
|
|
584
596
|
args_str = [call_args, args_str[-1]]
|
|
585
597
|
return "typing.%s[%s]" % (element._name, ", ".join(args_str))
|
|
586
598
|
else:
|
|
587
|
-
|
|
599
|
+
# Handle the case where we have a generic type without a _name
|
|
600
|
+
origin = element.__origin__
|
|
601
|
+
if isinstance(origin, type):
|
|
602
|
+
origin_str = _format_qualified_class_name(origin)
|
|
603
|
+
else:
|
|
604
|
+
origin_str = str(origin)
|
|
605
|
+
return "%s[%s]" % (origin_str, ", ".join(args_str))
|
|
588
606
|
elif isinstance(element, ForwardRef):
|
|
589
607
|
f_arg = self._get_module_name_alias(element.__forward_arg__)
|
|
590
|
-
# if f_arg in ("Run", "Task"): # HACK -- forward references in current.py
|
|
591
|
-
# _add_to_import("metaflow")
|
|
592
|
-
# f_arg = "metaflow.%s" % f_arg
|
|
593
608
|
_add_to_typing_check(f_arg)
|
|
594
609
|
return '"%s"' % f_arg
|
|
595
610
|
elif inspect.getmodule(element) == inspect.getmodule(typing):
|
|
@@ -629,6 +644,21 @@ class StubGenerator:
|
|
|
629
644
|
"deployer"
|
|
630
645
|
] = (self._current_module_name + "." + name)
|
|
631
646
|
|
|
647
|
+
# Handle TypedDict gracefully for Python 3.7 compatibility
|
|
648
|
+
# _TypedDictMeta is not available in Python 3.7
|
|
649
|
+
typed_dict_meta = getattr(typing, "_TypedDictMeta", None)
|
|
650
|
+
if typed_dict_meta is not None and isinstance(clazz, typed_dict_meta):
|
|
651
|
+
self._sub_module_imports.add(("typing", "TypedDict"))
|
|
652
|
+
total_flag = getattr(clazz, "__total__", False)
|
|
653
|
+
buff = StringIO()
|
|
654
|
+
# Emit the TypedDict base and total flag
|
|
655
|
+
buff.write(f"class {name}(TypedDict, total={total_flag}):\n")
|
|
656
|
+
# Write out each field from __annotations__
|
|
657
|
+
for field_name, field_type in clazz.__annotations__.items():
|
|
658
|
+
ann = self._get_element_name_with_module(field_type)
|
|
659
|
+
buff.write(f"{TAB}{field_name}: {ann}\n")
|
|
660
|
+
return buff.getvalue()
|
|
661
|
+
|
|
632
662
|
buff = StringIO()
|
|
633
663
|
# Class prototype
|
|
634
664
|
buff.write("class " + name.split(".")[-1] + "(")
|
|
@@ -973,7 +1003,6 @@ class StubGenerator:
|
|
|
973
1003
|
]
|
|
974
1004
|
|
|
975
1005
|
docs = split_docs(raw_doc, section_boundaries)
|
|
976
|
-
|
|
977
1006
|
parameters, no_arg_version = parse_params_from_doc(docs["param_doc"])
|
|
978
1007
|
|
|
979
1008
|
if docs["add_to_current_doc"]:
|
|
@@ -1501,6 +1530,8 @@ class StubGenerator:
|
|
|
1501
1530
|
f.write("import " + module + "\n")
|
|
1502
1531
|
if module == "typing":
|
|
1503
1532
|
imported_typing = True
|
|
1533
|
+
for module, sub_module in self._sub_module_imports:
|
|
1534
|
+
f.write(f"from {module} import {sub_module}\n")
|
|
1504
1535
|
if self._typing_imports:
|
|
1505
1536
|
if not imported_typing:
|
|
1506
1537
|
f.write("import typing\n")
|
metaflow/cmd/develop/stubs.py
CHANGED
|
@@ -12,25 +12,13 @@ from . import develop
|
|
|
12
12
|
from .stub_generator import StubGenerator
|
|
13
13
|
|
|
14
14
|
_py_ver = sys.version_info[:2]
|
|
15
|
-
_metadata_package = None
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if _py_ver >= (3, 4):
|
|
24
|
-
if _py_ver >= (3, 8):
|
|
25
|
-
from importlib import metadata
|
|
26
|
-
elif _py_ver >= (3, 7):
|
|
27
|
-
from metaflow._vendor.v3_7 import importlib_metadata as metadata
|
|
28
|
-
elif _py_ver >= (3, 6):
|
|
29
|
-
from metaflow._vendor.v3_6 import importlib_metadata as metadata
|
|
30
|
-
else:
|
|
31
|
-
from metaflow._vendor.v3_5 import importlib_metadata as metadata
|
|
32
|
-
_metadata_package = metadata
|
|
33
|
-
return _metadata_package
|
|
16
|
+
if _py_ver >= (3, 8):
|
|
17
|
+
from importlib import metadata
|
|
18
|
+
elif _py_ver >= (3, 7):
|
|
19
|
+
from metaflow._vendor.v3_7 import importlib_metadata as metadata
|
|
20
|
+
else:
|
|
21
|
+
from metaflow._vendor.v3_6 import importlib_metadata as metadata
|
|
34
22
|
|
|
35
23
|
|
|
36
24
|
@develop.group(short_help="Stubs management")
|
|
@@ -45,12 +33,6 @@ def stubs(ctx: Any):
|
|
|
45
33
|
This CLI provides utilities to check and generate stubs for your current Metaflow
|
|
46
34
|
installation.
|
|
47
35
|
"""
|
|
48
|
-
if _check_stubs_supported() is None:
|
|
49
|
-
raise click.UsageError(
|
|
50
|
-
"Building and installing stubs are not supported on Python %d.%d "
|
|
51
|
-
"(3.4 minimum required)" % _py_ver,
|
|
52
|
-
ctx=ctx,
|
|
53
|
-
)
|
|
54
36
|
|
|
55
37
|
|
|
56
38
|
@stubs.command(short_help="Check validity of stubs")
|
|
@@ -187,7 +169,7 @@ setup(
|
|
|
187
169
|
packages=find_namespace_packages(),
|
|
188
170
|
package_data={{"metaflow-stubs": ["generated_for.txt", "py.typed", "**/*.pyi"]}},
|
|
189
171
|
install_requires=["metaflow=={mf_version}"],
|
|
190
|
-
python_requires=">=3.
|
|
172
|
+
python_requires=">=3.6.1",
|
|
191
173
|
)
|
|
192
174
|
"""
|
|
193
175
|
)
|
|
@@ -330,14 +312,14 @@ def get_packages_for_stubs() -> Tuple[List[Tuple[str, str]], List[str]]:
|
|
|
330
312
|
# some reason it shows up multiple times.
|
|
331
313
|
interesting_dists = [
|
|
332
314
|
d
|
|
333
|
-
for d in
|
|
315
|
+
for d in metadata.distributions()
|
|
334
316
|
if any(
|
|
335
317
|
[
|
|
336
318
|
p == "metaflow-stubs"
|
|
337
319
|
for p in (d.read_text("top_level.txt") or "").split()
|
|
338
320
|
]
|
|
339
321
|
)
|
|
340
|
-
and isinstance(d,
|
|
322
|
+
and isinstance(d, metadata.PathDistribution)
|
|
341
323
|
]
|
|
342
324
|
|
|
343
325
|
for dist in interesting_dists:
|
metaflow/cmd/make_wrapper.py
CHANGED
|
@@ -28,6 +28,36 @@ def find_makefile():
|
|
|
28
28
|
if makefile_candidate.is_file():
|
|
29
29
|
return makefile_candidate
|
|
30
30
|
|
|
31
|
+
# 4) When developing, Metaflow might be installed with --editable, which means the devtools will not be located within site-packages.
|
|
32
|
+
# We read the actual location from package metadata in this case, but only do this heavier operation if the above lookups fail.
|
|
33
|
+
try:
|
|
34
|
+
import json
|
|
35
|
+
from importlib.metadata import Distribution
|
|
36
|
+
|
|
37
|
+
direct_url = Distribution.from_name("metaflow").read_text("direct_url.json")
|
|
38
|
+
if direct_url:
|
|
39
|
+
content = json.loads(direct_url)
|
|
40
|
+
url = content.get("url", "")
|
|
41
|
+
if not url.startswith("file://"):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
makefile_candidate = (
|
|
45
|
+
Path(url.replace("file://", "")) / "devtools" / "Makefile"
|
|
46
|
+
)
|
|
47
|
+
if makefile_candidate.is_file():
|
|
48
|
+
return makefile_candidate
|
|
49
|
+
else:
|
|
50
|
+
# No dist metadata found. This is tied to the version of pip being used
|
|
51
|
+
# Do not bother with .egg-link installs due to the handling of the file contents being a headache due to lack of a unified spec.
|
|
52
|
+
print(
|
|
53
|
+
"Could not locate an installation of Metaflow. No package metadata found."
|
|
54
|
+
)
|
|
55
|
+
print(
|
|
56
|
+
"If Metaflow is installed as editable, try upgrading the version of pip and reinstalling in order to generate proper package metadata.\n"
|
|
57
|
+
)
|
|
58
|
+
except Exception:
|
|
59
|
+
return None
|
|
60
|
+
|
|
31
61
|
return None
|
|
32
62
|
|
|
33
63
|
|
metaflow/datastore/__init__.py
CHANGED
|
@@ -38,7 +38,7 @@ class ContentAddressedStore(object):
|
|
|
38
38
|
def set_blob_cache(self, blob_cache):
|
|
39
39
|
self._blob_cache = blob_cache
|
|
40
40
|
|
|
41
|
-
def save_blobs(self, blob_iter, raw=False, len_hint=0):
|
|
41
|
+
def save_blobs(self, blob_iter, raw=False, len_hint=0, is_transfer=False):
|
|
42
42
|
"""
|
|
43
43
|
Saves blobs of data to the datastore
|
|
44
44
|
|
|
@@ -60,11 +60,16 @@ class ContentAddressedStore(object):
|
|
|
60
60
|
|
|
61
61
|
Parameters
|
|
62
62
|
----------
|
|
63
|
-
blob_iter : Iterator
|
|
64
|
-
|
|
63
|
+
blob_iter : Iterator
|
|
64
|
+
Iterator over bytes objects to save
|
|
65
|
+
raw : bool, default False
|
|
65
66
|
Whether to save the bytes directly or process them, by default False
|
|
66
|
-
len_hint :
|
|
67
|
+
len_hint : int, default 0
|
|
68
|
+
Hint of the number of blobs that will be produced by the
|
|
67
69
|
iterator, by default 0
|
|
70
|
+
is_transfer : bool, default False
|
|
71
|
+
If True, this indicates we are saving blobs directly from the output of another
|
|
72
|
+
content addressed store's
|
|
68
73
|
|
|
69
74
|
Returns
|
|
70
75
|
-------
|
|
@@ -76,6 +81,20 @@ class ContentAddressedStore(object):
|
|
|
76
81
|
|
|
77
82
|
def packing_iter():
|
|
78
83
|
for blob in blob_iter:
|
|
84
|
+
if is_transfer:
|
|
85
|
+
key, blob_data, meta = blob
|
|
86
|
+
path = self._storage_impl.path_join(self._prefix, key[:2], key)
|
|
87
|
+
# Transfer data is always raw/decompressed, so mark it as such
|
|
88
|
+
meta_corrected = {"cas_raw": True, "cas_version": 1}
|
|
89
|
+
|
|
90
|
+
results.append(
|
|
91
|
+
self.save_blobs_result(
|
|
92
|
+
uri=self._storage_impl.full_uri(path),
|
|
93
|
+
key=key,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
yield path, (BytesIO(blob_data), meta_corrected)
|
|
97
|
+
continue
|
|
79
98
|
sha = sha1(blob).hexdigest()
|
|
80
99
|
path = self._storage_impl.path_join(self._prefix, sha[:2], sha)
|
|
81
100
|
results.append(
|
|
@@ -100,7 +119,7 @@ class ContentAddressedStore(object):
|
|
|
100
119
|
self._storage_impl.save_bytes(packing_iter(), overwrite=True, len_hint=len_hint)
|
|
101
120
|
return results
|
|
102
121
|
|
|
103
|
-
def load_blobs(self, keys, force_raw=False):
|
|
122
|
+
def load_blobs(self, keys, force_raw=False, is_transfer=False):
|
|
104
123
|
"""
|
|
105
124
|
Mirror function of save_blobs
|
|
106
125
|
|
|
@@ -111,15 +130,20 @@ class ContentAddressedStore(object):
|
|
|
111
130
|
----------
|
|
112
131
|
keys : List of string
|
|
113
132
|
Key describing the object to load
|
|
114
|
-
force_raw : bool,
|
|
133
|
+
force_raw : bool, default False
|
|
115
134
|
Support for backward compatibility with previous datastores. If
|
|
116
135
|
True, this will force the key to be loaded as is (raw). By default,
|
|
117
136
|
False
|
|
137
|
+
is_transfer : bool, default False
|
|
138
|
+
If True, this indicates we are loading blobs to transfer them directly
|
|
139
|
+
to another datastore. We will, in this case, also transfer the metadata
|
|
140
|
+
and do minimal processing. This is for internal use only.
|
|
118
141
|
|
|
119
142
|
Returns
|
|
120
143
|
-------
|
|
121
144
|
Returns an iterator of (string, bytes) tuples; the iterator may return keys
|
|
122
|
-
in a different order than were passed in.
|
|
145
|
+
in a different order than were passed in. If is_transfer is True, the tuple
|
|
146
|
+
has three elements with the third one being the metadata.
|
|
123
147
|
"""
|
|
124
148
|
load_paths = []
|
|
125
149
|
for key in keys:
|
|
@@ -127,7 +151,11 @@ class ContentAddressedStore(object):
|
|
|
127
151
|
if self._blob_cache:
|
|
128
152
|
blob = self._blob_cache.load_key(key)
|
|
129
153
|
if blob is not None:
|
|
130
|
-
|
|
154
|
+
if is_transfer:
|
|
155
|
+
# Cached blobs are decompressed/processed bytes regardless of original format
|
|
156
|
+
yield key, blob, {"cas_raw": False, "cas_version": 1}
|
|
157
|
+
else:
|
|
158
|
+
yield key, blob
|
|
131
159
|
else:
|
|
132
160
|
path = self._storage_impl.path_join(self._prefix, key[:2], key)
|
|
133
161
|
load_paths.append((key, path))
|
|
@@ -169,7 +197,10 @@ class ContentAddressedStore(object):
|
|
|
169
197
|
if self._blob_cache:
|
|
170
198
|
self._blob_cache.store_key(key, blob)
|
|
171
199
|
|
|
172
|
-
|
|
200
|
+
if is_transfer:
|
|
201
|
+
yield key, blob, meta # Preserve exact original metadata from storage
|
|
202
|
+
else:
|
|
203
|
+
yield key, blob
|
|
173
204
|
|
|
174
205
|
def _unpack_backward_compatible(self, blob):
|
|
175
206
|
# This is the backward compatible unpack
|
|
@@ -21,9 +21,18 @@ class TaskDataStoreSet(object):
|
|
|
21
21
|
pathspecs=None,
|
|
22
22
|
prefetch_data_artifacts=None,
|
|
23
23
|
allow_not_done=False,
|
|
24
|
+
join_type=None,
|
|
25
|
+
orig_flow_datastore=None,
|
|
26
|
+
spin_artifacts=None,
|
|
24
27
|
):
|
|
25
28
|
self.task_datastores = flow_datastore.get_task_datastores(
|
|
26
|
-
run_id,
|
|
29
|
+
run_id,
|
|
30
|
+
steps=steps,
|
|
31
|
+
pathspecs=pathspecs,
|
|
32
|
+
allow_not_done=allow_not_done,
|
|
33
|
+
join_type=join_type,
|
|
34
|
+
orig_flow_datastore=orig_flow_datastore,
|
|
35
|
+
spin_artifacts=spin_artifacts,
|
|
27
36
|
)
|
|
28
37
|
|
|
29
38
|
if prefetch_data_artifacts:
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import json
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
3
4
|
|
|
4
5
|
from .. import metaflow_config
|
|
5
6
|
|
|
6
7
|
from .content_addressed_store import ContentAddressedStore
|
|
7
8
|
from .task_datastore import TaskDataStore
|
|
9
|
+
from .spin_datastore import SpinTaskDatastore
|
|
10
|
+
from ..metaflow_profile import from_start
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class FlowDataStore(object):
|
|
@@ -63,10 +66,16 @@ class FlowDataStore(object):
|
|
|
63
66
|
self._storage_impl.path_join(self.flow_name, "data"), self._storage_impl
|
|
64
67
|
)
|
|
65
68
|
|
|
69
|
+
# Private
|
|
70
|
+
self._metadata_cache = None
|
|
71
|
+
|
|
66
72
|
@property
|
|
67
73
|
def datastore_root(self):
|
|
68
74
|
return self._storage_impl.datastore_root
|
|
69
75
|
|
|
76
|
+
def set_metadata_cache(self, cache):
|
|
77
|
+
self._metadata_cache = cache
|
|
78
|
+
|
|
70
79
|
def get_task_datastores(
|
|
71
80
|
self,
|
|
72
81
|
run_id=None,
|
|
@@ -76,6 +85,9 @@ class FlowDataStore(object):
|
|
|
76
85
|
attempt=None,
|
|
77
86
|
include_prior=False,
|
|
78
87
|
mode="r",
|
|
88
|
+
join_type=None,
|
|
89
|
+
orig_flow_datastore=None,
|
|
90
|
+
spin_artifacts=None,
|
|
79
91
|
):
|
|
80
92
|
"""
|
|
81
93
|
Return a list of TaskDataStore for a subset of the tasks.
|
|
@@ -95,7 +107,7 @@ class FlowDataStore(object):
|
|
|
95
107
|
Steps to get the tasks from. If run_id is specified, this
|
|
96
108
|
must also be specified, by default None
|
|
97
109
|
pathspecs : List[str], optional
|
|
98
|
-
Full task specs (run_id/step_name/task_id). Can be used instead of
|
|
110
|
+
Full task specs (run_id/step_name/task_id[/attempt]). Can be used instead of
|
|
99
111
|
specifying run_id and steps, by default None
|
|
100
112
|
allow_not_done : bool, optional
|
|
101
113
|
If True, returns the latest attempt of a task even if that attempt
|
|
@@ -106,6 +118,16 @@ class FlowDataStore(object):
|
|
|
106
118
|
If True, returns all attempts up to and including attempt.
|
|
107
119
|
mode : str, default "r"
|
|
108
120
|
Mode to initialize the returned TaskDataStores in.
|
|
121
|
+
join_type : str, optional, default None
|
|
122
|
+
If specified, the join type for the task. This is used to determine
|
|
123
|
+
the user specified artifacts for the task in case of a spin task.
|
|
124
|
+
orig_flow_datastore : MetadataProvider, optional, default None
|
|
125
|
+
The metadata provider in case of a spin task. If provided, the
|
|
126
|
+
returned TaskDataStore will be a SpinTaskDatastore instead of a
|
|
127
|
+
TaskDataStore.
|
|
128
|
+
spin_artifacts : Dict[str, Any], optional, default None
|
|
129
|
+
Artifacts provided by user that can override the artifacts fetched via the
|
|
130
|
+
spin pathspec.
|
|
109
131
|
|
|
110
132
|
Returns
|
|
111
133
|
-------
|
|
@@ -145,7 +167,14 @@ class FlowDataStore(object):
|
|
|
145
167
|
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
|
|
146
168
|
attempt_range = range(attempt + 1) if include_prior else [attempt]
|
|
147
169
|
for task_url in task_urls:
|
|
148
|
-
|
|
170
|
+
# task_url can have a trailing slash, so strip this to avoid empty strings in the split
|
|
171
|
+
task_splits = task_url.rstrip("/").split("/")
|
|
172
|
+
# Usually it is flow, run, step, task (so 4 components) -- if we have a
|
|
173
|
+
# fifth one, there is a specific attempt number listed as well.
|
|
174
|
+
task_attempt_range = attempt_range
|
|
175
|
+
if len(task_splits) == 5:
|
|
176
|
+
task_attempt_range = [int(task_splits[4])]
|
|
177
|
+
for attempt in task_attempt_range:
|
|
149
178
|
for suffix in [
|
|
150
179
|
TaskDataStore.METADATA_DATA_SUFFIX,
|
|
151
180
|
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
|
|
@@ -198,7 +227,18 @@ class FlowDataStore(object):
|
|
|
198
227
|
else (latest_started_attempts & done_attempts)
|
|
199
228
|
)
|
|
200
229
|
latest_to_fetch = [
|
|
201
|
-
(
|
|
230
|
+
(
|
|
231
|
+
v[0],
|
|
232
|
+
v[1],
|
|
233
|
+
v[2],
|
|
234
|
+
v[3],
|
|
235
|
+
data_objs.get(v),
|
|
236
|
+
mode,
|
|
237
|
+
allow_not_done,
|
|
238
|
+
join_type,
|
|
239
|
+
orig_flow_datastore,
|
|
240
|
+
spin_artifacts,
|
|
241
|
+
)
|
|
202
242
|
for v in latest_to_fetch
|
|
203
243
|
]
|
|
204
244
|
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
|
|
@@ -212,8 +252,63 @@ class FlowDataStore(object):
|
|
|
212
252
|
data_metadata=None,
|
|
213
253
|
mode="r",
|
|
214
254
|
allow_not_done=False,
|
|
255
|
+
join_type=None,
|
|
256
|
+
orig_flow_datastore=None,
|
|
257
|
+
spin_artifacts=None,
|
|
258
|
+
persist=True,
|
|
215
259
|
):
|
|
216
|
-
|
|
260
|
+
if orig_flow_datastore is not None:
|
|
261
|
+
# In spin step subprocess, use SpinTaskDatastore for accessing artifacts
|
|
262
|
+
if join_type is not None:
|
|
263
|
+
# If join_type is specified, we need to use the artifacts corresponding
|
|
264
|
+
# to that particular join index, specified by the parent task pathspec.
|
|
265
|
+
spin_artifacts = spin_artifacts.get(
|
|
266
|
+
f"{run_id}/{step_name}/{task_id}", {}
|
|
267
|
+
)
|
|
268
|
+
from_start(
|
|
269
|
+
"FlowDataStore: get_task_datastore for spin task for type %s %s metadata"
|
|
270
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
271
|
+
)
|
|
272
|
+
# Get the task datastore for the spun task.
|
|
273
|
+
orig_datastore = orig_flow_datastore.get_task_datastore(
|
|
274
|
+
run_id,
|
|
275
|
+
step_name,
|
|
276
|
+
task_id,
|
|
277
|
+
attempt=attempt,
|
|
278
|
+
data_metadata=data_metadata,
|
|
279
|
+
mode=mode,
|
|
280
|
+
allow_not_done=allow_not_done,
|
|
281
|
+
persist=persist,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return SpinTaskDatastore(
|
|
285
|
+
self.flow_name,
|
|
286
|
+
run_id,
|
|
287
|
+
step_name,
|
|
288
|
+
task_id,
|
|
289
|
+
orig_datastore,
|
|
290
|
+
spin_artifacts,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
cache_hit = False
|
|
294
|
+
if (
|
|
295
|
+
self._metadata_cache is not None
|
|
296
|
+
and data_metadata is None
|
|
297
|
+
and attempt is not None
|
|
298
|
+
and allow_not_done is False
|
|
299
|
+
):
|
|
300
|
+
# If we have a metadata cache, we can try to load the metadata
|
|
301
|
+
# from the cache if it is not provided.
|
|
302
|
+
data_metadata = self._metadata_cache.load_metadata(
|
|
303
|
+
run_id, step_name, task_id, attempt
|
|
304
|
+
)
|
|
305
|
+
cache_hit = data_metadata is not None
|
|
306
|
+
|
|
307
|
+
from_start(
|
|
308
|
+
"FlowDataStore: get_task_datastore for regular task for type %s %s metadata"
|
|
309
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
310
|
+
)
|
|
311
|
+
task_datastore = TaskDataStore(
|
|
217
312
|
self,
|
|
218
313
|
run_id,
|
|
219
314
|
step_name,
|
|
@@ -222,8 +317,23 @@ class FlowDataStore(object):
|
|
|
222
317
|
data_metadata=data_metadata,
|
|
223
318
|
mode=mode,
|
|
224
319
|
allow_not_done=allow_not_done,
|
|
320
|
+
persist=persist,
|
|
225
321
|
)
|
|
226
322
|
|
|
323
|
+
# Only persist in cache if it is non-changing (so done only) and we have
|
|
324
|
+
# a non-None attempt
|
|
325
|
+
if (
|
|
326
|
+
not cache_hit
|
|
327
|
+
and self._metadata_cache is not None
|
|
328
|
+
and allow_not_done is False
|
|
329
|
+
and attempt is not None
|
|
330
|
+
):
|
|
331
|
+
self._metadata_cache.store_metadata(
|
|
332
|
+
run_id, step_name, task_id, attempt, task_datastore.ds_metadata
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return task_datastore
|
|
336
|
+
|
|
227
337
|
def save_data(self, data_iter, len_hint=0):
|
|
228
338
|
"""Saves data to the underlying content-addressed store
|
|
229
339
|
|
|
@@ -265,3 +375,13 @@ class FlowDataStore(object):
|
|
|
265
375
|
"""
|
|
266
376
|
for key, blob in self.ca_store.load_blobs(keys, force_raw=force_raw):
|
|
267
377
|
yield key, blob
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class MetadataCache(ABC):
|
|
381
|
+
@abstractmethod
|
|
382
|
+
def load_metadata(self, run_id, step_name, task_id, attempt):
|
|
383
|
+
raise NotImplementedError()
|
|
384
|
+
|
|
385
|
+
@abstractmethod
|
|
386
|
+
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
|
|
387
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from .task_datastore import TaskDataStore, require_mode
|
|
3
|
+
from ..metaflow_profile import from_start
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SpinTaskDatastore(object):
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
flow_name: str,
|
|
10
|
+
run_id: str,
|
|
11
|
+
step_name: str,
|
|
12
|
+
task_id: str,
|
|
13
|
+
orig_datastore: TaskDataStore,
|
|
14
|
+
spin_artifacts: Dict[str, Any],
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
SpinTaskDatastore is a datastore for a task that is used to retrieve
|
|
18
|
+
artifacts and attributes for a spin step. It uses the task pathspec
|
|
19
|
+
from a previous execution of the step to access the artifacts and attributes.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
-----------
|
|
23
|
+
flow_name : str
|
|
24
|
+
Name of the flow
|
|
25
|
+
run_id : str
|
|
26
|
+
Run ID of the flow
|
|
27
|
+
step_name : str
|
|
28
|
+
Name of the step
|
|
29
|
+
task_id : str
|
|
30
|
+
Task ID of the step
|
|
31
|
+
orig_datastore : TaskDataStore
|
|
32
|
+
The datastore for the underlying task that is being spun.
|
|
33
|
+
spin_artifacts : Dict[str, Any]
|
|
34
|
+
User provided artifacts that are to be used in the spin task. This is a dictionary
|
|
35
|
+
where keys are artifact names and values are the actual data or metadata.
|
|
36
|
+
"""
|
|
37
|
+
self.flow_name = flow_name
|
|
38
|
+
self.run_id = run_id
|
|
39
|
+
self.step_name = step_name
|
|
40
|
+
self.task_id = task_id
|
|
41
|
+
self.orig_datastore = orig_datastore
|
|
42
|
+
self.spin_artifacts = spin_artifacts
|
|
43
|
+
self._task = None
|
|
44
|
+
|
|
45
|
+
# Update _objects and _info in order to persist artifacts
|
|
46
|
+
# See `persist` method in `TaskDatastore` for more details
|
|
47
|
+
self._objects = self.orig_datastore._objects.copy()
|
|
48
|
+
self._info = self.orig_datastore._info.copy()
|
|
49
|
+
|
|
50
|
+
# We strip out some of the control ones
|
|
51
|
+
for key in ("_transition",):
|
|
52
|
+
if key in self._objects:
|
|
53
|
+
del self._objects[key]
|
|
54
|
+
del self._info[key]
|
|
55
|
+
|
|
56
|
+
from_start("SpinTaskDatastore: Initialized artifacts")
|
|
57
|
+
|
|
58
|
+
@require_mode(None)
|
|
59
|
+
def __getitem__(self, name):
|
|
60
|
+
try:
|
|
61
|
+
# Check if it's an artifact in the spin_artifacts
|
|
62
|
+
return self.spin_artifacts[name]
|
|
63
|
+
except KeyError:
|
|
64
|
+
try:
|
|
65
|
+
# Check if it's an attribute of the task
|
|
66
|
+
# _foreach_stack, _foreach_index, ...
|
|
67
|
+
return self.orig_datastore[name]
|
|
68
|
+
except (KeyError, AttributeError) as e:
|
|
69
|
+
raise KeyError(
|
|
70
|
+
f"Attribute '{name}' not found in the previous execution "
|
|
71
|
+
f"of the tasks for `{self.step_name}`."
|
|
72
|
+
) from e
|
|
73
|
+
|
|
74
|
+
@require_mode(None)
|
|
75
|
+
def is_none(self, name):
|
|
76
|
+
val = self.__getitem__(name)
|
|
77
|
+
return val is None
|
|
78
|
+
|
|
79
|
+
@require_mode(None)
|
|
80
|
+
def __contains__(self, name):
|
|
81
|
+
try:
|
|
82
|
+
_ = self.__getitem__(name)
|
|
83
|
+
return True
|
|
84
|
+
except KeyError:
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
@require_mode(None)
|
|
88
|
+
def items(self):
|
|
89
|
+
if self._objects:
|
|
90
|
+
return self._objects.items()
|
|
91
|
+
return {}
|