metaflow 2.15.21__py2.py3-none-any.whl → 2.16.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. metaflow/__init__.py +7 -1
  2. metaflow/cli.py +16 -1
  3. metaflow/cli_components/init_cmd.py +1 -0
  4. metaflow/cli_components/run_cmds.py +6 -2
  5. metaflow/client/core.py +22 -30
  6. metaflow/datastore/task_datastore.py +0 -1
  7. metaflow/debug.py +5 -0
  8. metaflow/decorators.py +230 -70
  9. metaflow/extension_support/__init__.py +15 -8
  10. metaflow/extension_support/_empty_file.py +2 -2
  11. metaflow/flowspec.py +80 -53
  12. metaflow/graph.py +24 -2
  13. metaflow/meta_files.py +13 -0
  14. metaflow/metadata_provider/metadata.py +7 -1
  15. metaflow/metaflow_config.py +5 -0
  16. metaflow/metaflow_environment.py +82 -25
  17. metaflow/metaflow_version.py +1 -1
  18. metaflow/package/__init__.py +664 -0
  19. metaflow/packaging_sys/__init__.py +870 -0
  20. metaflow/packaging_sys/backend.py +113 -0
  21. metaflow/packaging_sys/distribution_support.py +153 -0
  22. metaflow/packaging_sys/tar_backend.py +86 -0
  23. metaflow/packaging_sys/utils.py +91 -0
  24. metaflow/packaging_sys/v1.py +476 -0
  25. metaflow/plugins/airflow/airflow.py +5 -1
  26. metaflow/plugins/airflow/airflow_cli.py +15 -4
  27. metaflow/plugins/argo/argo_workflows.py +15 -4
  28. metaflow/plugins/argo/argo_workflows_cli.py +16 -4
  29. metaflow/plugins/aws/batch/batch.py +22 -3
  30. metaflow/plugins/aws/batch/batch_cli.py +3 -0
  31. metaflow/plugins/aws/batch/batch_decorator.py +13 -5
  32. metaflow/plugins/aws/step_functions/step_functions.py +4 -1
  33. metaflow/plugins/aws/step_functions/step_functions_cli.py +15 -4
  34. metaflow/plugins/cards/card_decorator.py +0 -5
  35. metaflow/plugins/kubernetes/kubernetes.py +8 -1
  36. metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
  37. metaflow/plugins/kubernetes/kubernetes_decorator.py +13 -5
  38. metaflow/plugins/package_cli.py +25 -23
  39. metaflow/plugins/parallel_decorator.py +4 -2
  40. metaflow/plugins/pypi/bootstrap.py +8 -2
  41. metaflow/plugins/pypi/conda_decorator.py +39 -82
  42. metaflow/plugins/pypi/conda_environment.py +6 -2
  43. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  44. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  45. metaflow/plugins/timeout_decorator.py +0 -1
  46. metaflow/plugins/uv/bootstrap.py +11 -0
  47. metaflow/plugins/uv/uv_environment.py +4 -2
  48. metaflow/pylint_wrapper.py +5 -1
  49. metaflow/runner/click_api.py +5 -4
  50. metaflow/runner/subprocess_manager.py +14 -2
  51. metaflow/runtime.py +37 -11
  52. metaflow/task.py +91 -7
  53. metaflow/user_configs/config_options.py +13 -8
  54. metaflow/user_configs/config_parameters.py +0 -4
  55. metaflow/user_decorators/__init__.py +0 -0
  56. metaflow/user_decorators/common.py +144 -0
  57. metaflow/user_decorators/mutable_flow.py +499 -0
  58. metaflow/user_decorators/mutable_step.py +424 -0
  59. metaflow/user_decorators/user_flow_decorator.py +263 -0
  60. metaflow/user_decorators/user_step_decorator.py +712 -0
  61. metaflow/util.py +4 -1
  62. metaflow/version.py +1 -1
  63. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/METADATA +2 -2
  64. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/RECORD +71 -60
  65. metaflow/info_file.py +0 -25
  66. metaflow/package.py +0 -203
  67. metaflow/user_configs/config_decorators.py +0 -568
  68. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Makefile +0 -0
  69. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  70. {metaflow-2.15.21.data → metaflow-2.16.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  71. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/licenses/LICENSE +0 -0
  74. {metaflow-2.15.21.dist-info → metaflow-2.16.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,113 @@
1
+ from abc import ABC, abstractmethod
2
+ from io import BytesIO
3
+ from typing import Any, IO, List, Optional, Union
4
+
5
+
6
+ class PackagingBackend(ABC):
7
+ _mappings = {}
8
+ type = "none"
9
+
10
+ def __init_subclass__(cls, **kwargs):
11
+ super().__init_subclass__(**kwargs)
12
+ if cls.type in cls._mappings:
13
+ raise ValueError(f"PackagingBackend {cls.type} already exists")
14
+ cls._mappings[cls.type] = cls
15
+
16
+ @classmethod
17
+ def get_backend(cls, name: str) -> "PackagingBackend":
18
+ if name not in cls._mappings:
19
+ raise ValueError(f"PackagingBackend {name} not found")
20
+ return cls._mappings[name]
21
+
22
+ @classmethod
23
+ def backend_type(cls) -> str:
24
+ return cls.type
25
+
26
+ @classmethod
27
+ @abstractmethod
28
+ def get_extract_commands(cls, archive_name: str, dest_dir: str) -> List[str]:
29
+ pass
30
+
31
+ def __init__(self):
32
+ self._archive = None
33
+
34
+ @abstractmethod
35
+ def create(self) -> "PackagingBackend":
36
+ pass
37
+
38
+ @abstractmethod
39
+ def add_file(self, filename: str, arcname: Optional[str] = None):
40
+ pass
41
+
42
+ @abstractmethod
43
+ def add_data(self, data: BytesIO, arcname: str):
44
+ pass
45
+
46
+ @abstractmethod
47
+ def close(self):
48
+ pass
49
+
50
+ @abstractmethod
51
+ def get_blob(self) -> Optional[Union[bytes, bytearray]]:
52
+ pass
53
+
54
+ @classmethod
55
+ @abstractmethod
56
+ def cls_open(cls, content: IO[bytes]) -> Any:
57
+ """Open the archive from the given content."""
58
+ pass
59
+
60
+ @classmethod
61
+ @abstractmethod
62
+ def cls_has_member(cls, archive: Any, name: str) -> bool:
63
+ pass
64
+
65
+ @classmethod
66
+ @abstractmethod
67
+ def cls_get_member(cls, archive: Any, name: str) -> Optional[bytes]:
68
+ pass
69
+
70
+ @classmethod
71
+ @abstractmethod
72
+ def cls_extract_members(
73
+ cls,
74
+ archive: Any,
75
+ members: Optional[List[str]] = None,
76
+ dest_dir: str = ".",
77
+ ) -> None:
78
+ pass
79
+
80
+ @classmethod
81
+ @abstractmethod
82
+ def cls_list_members(cls, archive: Any) -> Optional[List[str]]:
83
+ pass
84
+
85
+ def has_member(self, name: str) -> bool:
86
+ if self._archive:
87
+ return self.cls_has_member(self._archive, name)
88
+ raise ValueError("Cannot check for member in an uncreated archive")
89
+
90
+ def get_member(self, name: str) -> Optional[bytes]:
91
+ if self._archive:
92
+ return self.cls_get_member(self._archive, name)
93
+ raise ValueError("Cannot get member from an uncreated archive")
94
+
95
+ def extract_members(
96
+ self, members: Optional[List[str]] = None, dest_dir: str = "."
97
+ ) -> None:
98
+ if self._archive:
99
+ self.cls_extract_members(self._archive, members, dest_dir)
100
+ else:
101
+ raise ValueError("Cannot extract from an uncreated archive")
102
+
103
+ def list_members(self) -> Optional[List[str]]:
104
+ if self._archive:
105
+ return self.cls_list_members(self._archive)
106
+ raise ValueError("Cannot list members from an uncreated archive")
107
+
108
+ def __enter__(self):
109
+ self.create()
110
+ return self
111
+
112
+ def __exit__(self, exc_type, exc_value, traceback):
113
+ self.close()
@@ -0,0 +1,153 @@
1
+ # Support saving of distribution information so we can give it back to users even
2
+ # if we do not install those distributions. This is used to package distributions in
3
+ # the MetaflowCodeContent package and provide an experience as if the packages were installed
4
+ # system-wide.
5
+
6
+ import os
7
+ import re
8
+ import sys
9
+ from pathlib import Path
10
+ from types import ModuleType
11
+ from typing import (
12
+ Callable,
13
+ Dict,
14
+ List,
15
+ Mapping,
16
+ NamedTuple,
17
+ Optional,
18
+ Set,
19
+ TYPE_CHECKING,
20
+ Union,
21
+ cast,
22
+ )
23
+
24
+ import inspect
25
+ from collections import defaultdict
26
+
27
+ from ..extension_support import metadata
28
+ from ..util import get_metaflow_root
29
+
30
+ if TYPE_CHECKING:
31
+ import pathlib
32
+
33
+ _cached_distributions = None
34
+
35
+ packages_distributions = None # type: Optional[Callable[[], Mapping[str, List[str]]]]
36
+ name_normalizer = re.compile(r"[-_.]+")
37
+
38
+ if sys.version_info[:2] >= (3, 10):
39
+ packages_distributions = metadata.packages_distributions
40
+ else:
41
+ # This is the code present in 3.10+ -- we replicate here for other versions
42
+ def _packages_distributions() -> Mapping[str, List[str]]:
43
+ """
44
+ Return a mapping of top-level packages to their
45
+ distributions.
46
+ """
47
+ pkg_to_dist = defaultdict(list)
48
+ for dist in metadata.distributions():
49
+ for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
50
+ pkg_to_dist[pkg].append(dist.metadata["Name"])
51
+ return dict(pkg_to_dist)
52
+
53
+ def _top_level_declared(dist: metadata.Distribution) -> List[str]:
54
+ return (dist.read_text("top_level.txt") or "").split()
55
+
56
+ def _topmost(name: "pathlib.PurePosixPath") -> Optional[str]:
57
+ """
58
+ Return the top-most parent as long as there is a parent.
59
+ """
60
+ top, *rest = name.parts
61
+ return top if rest else None
62
+
63
+ def _get_toplevel_name(name: "pathlib.PurePosixPath") -> str:
64
+ return _topmost(name) or (
65
+ # python/typeshed#10328
66
+ inspect.getmodulename(name) # type: ignore
67
+ or str(name)
68
+ )
69
+
70
+ def _top_level_inferred(dist: "metadata.Distribution"):
71
+ opt_names = set(map(_get_toplevel_name, dist.files or []))
72
+
73
+ def importable_name(name):
74
+ return "." not in name
75
+
76
+ return filter(importable_name, opt_names)
77
+
78
+ packages_distributions = _packages_distributions
79
+
80
+
81
+ def modules_to_distributions() -> Dict[str, List[metadata.Distribution]]:
82
+ """
83
+ Return a mapping of top-level modules to their distributions.
84
+
85
+ Returns
86
+ -------
87
+ Dict[str, List[metadata.Distribution]]
88
+ A mapping of top-level modules to their distributions.
89
+ """
90
+ global _cached_distributions
91
+ pd = cast(Callable[[], Mapping[str, List[str]]], packages_distributions)
92
+ if _cached_distributions is None:
93
+ _cached_distributions = {
94
+ k: [metadata.distribution(d) for d in v] for k, v in pd().items()
95
+ }
96
+ return _cached_distributions
97
+
98
+
99
+ _ModuleInfo = NamedTuple(
100
+ "_ModuleInfo",
101
+ [
102
+ ("name", str),
103
+ ("root_paths", Set[str]),
104
+ ("module", ModuleType),
105
+ ("metaflow_module", bool),
106
+ ],
107
+ )
108
+
109
+
110
+ class PackagedDistribution(metadata.Distribution):
111
+ """
112
+ A Python Package packaged within a MetaflowCodeContent. This allows users to use use importlib
113
+ as they would regularly and the packaged Python Package would be considered as a
114
+ distribution even if it really isn't (since it is just included in the PythonPath).
115
+ """
116
+
117
+ def __init__(self, root: str, content: Dict[str, str]):
118
+ self._root = Path(root)
119
+ self._content = content
120
+
121
+ # Strongly inspired from PathDistribution in metadata.py
122
+ def read_text(self, filename: Union[str, os.PathLike]) -> Optional[str]:
123
+ if str(filename) in self._content:
124
+ return self._content[str(filename)]
125
+ return None
126
+
127
+ read_text.__doc__ = metadata.Distribution.read_text.__doc__
128
+
129
+ # Returns a metadata.SimplePath but not always present in importlib.metadata libs so
130
+ # skipping return type.
131
+ def locate_file(self, path: Union[str, os.PathLike]):
132
+ return self._root / path
133
+
134
+
135
+ class PackagedDistributionFinder(metadata.DistributionFinder):
136
+ def __init__(self, dist_info: Dict[str, Dict[str, str]]):
137
+ self._dist_info = dist_info
138
+
139
+ def find_distributions(self, context=metadata.DistributionFinder.Context()):
140
+ if context.name is None:
141
+ # Yields all known distributions
142
+ for name, info in self._dist_info.items():
143
+ yield PackagedDistribution(
144
+ os.path.join(get_metaflow_root(), name), info
145
+ )
146
+ return None
147
+ name = name_normalizer.sub("-", cast(str, context.name)).lower()
148
+ if name in self._dist_info:
149
+ yield PackagedDistribution(
150
+ os.path.join(get_metaflow_root(), cast(str, context.name)),
151
+ self._dist_info[name],
152
+ )
153
+ return None
@@ -0,0 +1,86 @@
1
+ import tarfile
2
+
3
+ from io import BytesIO
4
+ from typing import IO, List, Optional, Union
5
+
6
+ from .backend import PackagingBackend
7
+
8
+
9
+ class TarPackagingBackend(PackagingBackend):
10
+ type = "tgz"
11
+
12
+ @classmethod
13
+ def get_extract_commands(cls, archive_name: str, dest_dir: str) -> List[str]:
14
+ return [
15
+ f"TAR_OPTIONS='--warning=no-timestamp' tar -xzf {archive_name} -C {dest_dir}"
16
+ ]
17
+
18
+ def __init__(self):
19
+ super().__init__()
20
+ self._buf = None
21
+
22
+ def create(self):
23
+ self._buf = BytesIO()
24
+ self._archive = tarfile.open(
25
+ fileobj=self._buf, mode="w:gz", compresslevel=3, dereference=True
26
+ )
27
+ return self
28
+
29
+ def add_file(self, filename: str, arcname: Optional[str] = None):
30
+ info = self._archive.gettarinfo(filename, arcname)
31
+ # Setting this default to Dec 3, 2019
32
+ info.mtime = 1575360000
33
+ with open(filename, mode="rb") as f:
34
+ self._archive.addfile(info, f)
35
+
36
+ def add_data(self, data: BytesIO, arcname: str):
37
+ info = tarfile.TarInfo(arcname)
38
+ data.seek(0)
39
+ info.size = len(data.getvalue())
40
+ # Setting this default to Dec 3, 2019
41
+ info.mtime = 1575360000
42
+ self._archive.addfile(info, data)
43
+
44
+ def close(self):
45
+ if self._archive:
46
+ self._archive.close()
47
+
48
+ def get_blob(self) -> Optional[Union[bytes, bytearray]]:
49
+ if self._buf:
50
+ blob = bytearray(self._buf.getvalue())
51
+ blob[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
52
+ return blob
53
+ return None
54
+
55
+ @classmethod
56
+ def cls_open(cls, content: IO[bytes]) -> tarfile.TarFile:
57
+ return tarfile.open(fileobj=content, mode="r:gz")
58
+
59
+ @classmethod
60
+ def cls_has_member(cls, archive: tarfile.TarFile, name: str) -> bool:
61
+ try:
62
+ archive.getmember(name)
63
+ return True
64
+ except KeyError:
65
+ return False
66
+
67
+ @classmethod
68
+ def cls_get_member(cls, archive: tarfile.TarFile, name: str) -> Optional[bytes]:
69
+ try:
70
+ member = archive.getmember(name)
71
+ return archive.extractfile(member).read()
72
+ except KeyError:
73
+ return None
74
+
75
+ @classmethod
76
+ def cls_extract_members(
77
+ cls,
78
+ archive: tarfile.TarFile,
79
+ members: Optional[List[str]] = None,
80
+ dest_dir: str = ".",
81
+ ) -> None:
82
+ archive.extractall(path=dest_dir, members=members)
83
+
84
+ @classmethod
85
+ def cls_list_members(cls, archive: tarfile.TarFile) -> Optional[List[str]]:
86
+ return archive.getnames() or None
@@ -0,0 +1,91 @@
1
+ import os
2
+ from contextlib import contextmanager
3
+ from typing import Callable, Generator, List, Optional, Tuple
4
+
5
+ from ..util import to_unicode
6
+
7
+
8
+ # this is os.walk(follow_symlinks=True) with cycle detection
9
+ def walk_without_cycles(
10
+ top_root: str,
11
+ exclude_dirs: Optional[List[str]] = None,
12
+ ) -> Generator[Tuple[str, List[str]], None, None]:
13
+ seen = set()
14
+
15
+ default_skip_dirs = ["__pycache__"]
16
+
17
+ def _recurse(root, skip_dirs):
18
+ for parent, dirs, files in os.walk(root):
19
+ dirs[:] = [d for d in dirs if d not in skip_dirs]
20
+ for d in dirs:
21
+ path = os.path.join(parent, d)
22
+ if os.path.islink(path):
23
+ # Breaking loops: never follow the same symlink twice
24
+ #
25
+ # NOTE: this also means that links to sibling links are
26
+ # not followed. In this case:
27
+ #
28
+ # x -> y
29
+ # y -> oo
30
+ # oo/real_file
31
+ #
32
+ # real_file is only included twice, not three times
33
+ reallink = os.path.realpath(path)
34
+ if reallink not in seen:
35
+ seen.add(reallink)
36
+ for x in _recurse(path, default_skip_dirs):
37
+ yield x
38
+ yield parent, files
39
+
40
+ skip_dirs = set(default_skip_dirs + (exclude_dirs or []))
41
+ for x in _recurse(top_root, skip_dirs):
42
+ skip_dirs = default_skip_dirs
43
+ yield x
44
+
45
+
46
+ def walk(
47
+ root: str,
48
+ exclude_hidden: bool = True,
49
+ file_filter: Optional[Callable[[str], bool]] = None,
50
+ exclude_tl_dirs: Optional[List[str]] = None,
51
+ ) -> Generator[Tuple[str, str], None, None]:
52
+ root = to_unicode(root) # handle files/folder with non ascii chars
53
+ prefixlen = len("%s/" % os.path.dirname(root))
54
+ for (
55
+ path,
56
+ files,
57
+ ) in walk_without_cycles(root, exclude_tl_dirs):
58
+ if exclude_hidden and "/." in path:
59
+ continue
60
+ # path = path[2:] # strip the ./ prefix
61
+ # if path and (path[0] == '.' or './' in path):
62
+ # continue
63
+ for fname in files:
64
+ if file_filter is None or file_filter(fname):
65
+ p = os.path.join(path, fname)
66
+ yield p, p[prefixlen:]
67
+
68
+
69
+ def suffix_filter(suffixes: List[str]) -> Callable[[str], bool]:
70
+ """
71
+ Returns a filter function that checks if a file ends with any of the given suffixes.
72
+ """
73
+ suffixes = [s.lower() for s in suffixes]
74
+
75
+ def _filter(fname: str) -> bool:
76
+ fname = fname.lower()
77
+ return (
78
+ suffixes is None
79
+ or (fname[0] == "." and fname in suffixes)
80
+ or (fname[0] != "." and any(fname.endswith(suffix) for suffix in suffixes))
81
+ )
82
+
83
+ return _filter
84
+
85
+
86
+ @contextmanager
87
+ def with_dir(new_dir):
88
+ current_dir = os.getcwd()
89
+ os.chdir(new_dir)
90
+ yield new_dir
91
+ os.chdir(current_dir)