metaflow 2.16.5__py2.py3-none-any.whl → 2.16.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/_vendor/click/core.py +3 -4
- metaflow/_vendor/imghdr/__init__.py +11 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cli.py +11 -2
- metaflow/client/core.py +6 -1
- metaflow/extension_support/__init__.py +4 -3
- metaflow/metaflow_environment.py +14 -6
- metaflow/package/__init__.py +18 -9
- metaflow/packaging_sys/__init__.py +53 -43
- metaflow/packaging_sys/backend.py +21 -6
- metaflow/packaging_sys/tar_backend.py +16 -3
- metaflow/packaging_sys/v1.py +21 -21
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +37 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +16 -0
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +4 -2
- metaflow/runner/click_api.py +14 -7
- metaflow/runner/deployer.py +80 -1
- metaflow/runner/subprocess_manager.py +20 -12
- metaflow/user_decorators/mutable_flow.py +3 -1
- metaflow/vendor.py +23 -6
- metaflow/version.py +1 -1
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/METADATA +2 -2
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/RECORD +47 -30
- {metaflow-2.16.5.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/Makefile +0 -0
- {metaflow-2.16.5.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.16.5.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/WHEEL +0 -0
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/entry_points.txt +0 -0
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.16.5.dist-info → metaflow-2.16.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
__all__ = ['Serializer', 'SerializerError']
|
3
|
+
|
4
|
+
from .error import YAMLError
|
5
|
+
from .events import *
|
6
|
+
from .nodes import *
|
7
|
+
|
8
|
+
class SerializerError(YAMLError):
|
9
|
+
pass
|
10
|
+
|
11
|
+
class Serializer:
|
12
|
+
|
13
|
+
ANCHOR_TEMPLATE = 'id%03d'
|
14
|
+
|
15
|
+
def __init__(self, encoding=None,
|
16
|
+
explicit_start=None, explicit_end=None, version=None, tags=None):
|
17
|
+
self.use_encoding = encoding
|
18
|
+
self.use_explicit_start = explicit_start
|
19
|
+
self.use_explicit_end = explicit_end
|
20
|
+
self.use_version = version
|
21
|
+
self.use_tags = tags
|
22
|
+
self.serialized_nodes = {}
|
23
|
+
self.anchors = {}
|
24
|
+
self.last_anchor_id = 0
|
25
|
+
self.closed = None
|
26
|
+
|
27
|
+
def open(self):
|
28
|
+
if self.closed is None:
|
29
|
+
self.emit(StreamStartEvent(encoding=self.use_encoding))
|
30
|
+
self.closed = False
|
31
|
+
elif self.closed:
|
32
|
+
raise SerializerError("serializer is closed")
|
33
|
+
else:
|
34
|
+
raise SerializerError("serializer is already opened")
|
35
|
+
|
36
|
+
def close(self):
|
37
|
+
if self.closed is None:
|
38
|
+
raise SerializerError("serializer is not opened")
|
39
|
+
elif not self.closed:
|
40
|
+
self.emit(StreamEndEvent())
|
41
|
+
self.closed = True
|
42
|
+
|
43
|
+
#def __del__(self):
|
44
|
+
# self.close()
|
45
|
+
|
46
|
+
def serialize(self, node):
|
47
|
+
if self.closed is None:
|
48
|
+
raise SerializerError("serializer is not opened")
|
49
|
+
elif self.closed:
|
50
|
+
raise SerializerError("serializer is closed")
|
51
|
+
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
|
52
|
+
version=self.use_version, tags=self.use_tags))
|
53
|
+
self.anchor_node(node)
|
54
|
+
self.serialize_node(node, None, None)
|
55
|
+
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
|
56
|
+
self.serialized_nodes = {}
|
57
|
+
self.anchors = {}
|
58
|
+
self.last_anchor_id = 0
|
59
|
+
|
60
|
+
def anchor_node(self, node):
|
61
|
+
if node in self.anchors:
|
62
|
+
if self.anchors[node] is None:
|
63
|
+
self.anchors[node] = self.generate_anchor(node)
|
64
|
+
else:
|
65
|
+
self.anchors[node] = None
|
66
|
+
if isinstance(node, SequenceNode):
|
67
|
+
for item in node.value:
|
68
|
+
self.anchor_node(item)
|
69
|
+
elif isinstance(node, MappingNode):
|
70
|
+
for key, value in node.value:
|
71
|
+
self.anchor_node(key)
|
72
|
+
self.anchor_node(value)
|
73
|
+
|
74
|
+
def generate_anchor(self, node):
|
75
|
+
self.last_anchor_id += 1
|
76
|
+
return self.ANCHOR_TEMPLATE % self.last_anchor_id
|
77
|
+
|
78
|
+
def serialize_node(self, node, parent, index):
|
79
|
+
alias = self.anchors[node]
|
80
|
+
if node in self.serialized_nodes:
|
81
|
+
self.emit(AliasEvent(alias))
|
82
|
+
else:
|
83
|
+
self.serialized_nodes[node] = True
|
84
|
+
self.descend_resolver(parent, index)
|
85
|
+
if isinstance(node, ScalarNode):
|
86
|
+
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
|
87
|
+
default_tag = self.resolve(ScalarNode, node.value, (False, True))
|
88
|
+
implicit = (node.tag == detected_tag), (node.tag == default_tag)
|
89
|
+
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
|
90
|
+
style=node.style))
|
91
|
+
elif isinstance(node, SequenceNode):
|
92
|
+
implicit = (node.tag
|
93
|
+
== self.resolve(SequenceNode, node.value, True))
|
94
|
+
self.emit(SequenceStartEvent(alias, node.tag, implicit,
|
95
|
+
flow_style=node.flow_style))
|
96
|
+
index = 0
|
97
|
+
for item in node.value:
|
98
|
+
self.serialize_node(item, node, index)
|
99
|
+
index += 1
|
100
|
+
self.emit(SequenceEndEvent())
|
101
|
+
elif isinstance(node, MappingNode):
|
102
|
+
implicit = (node.tag
|
103
|
+
== self.resolve(MappingNode, node.value, True))
|
104
|
+
self.emit(MappingStartEvent(alias, node.tag, implicit,
|
105
|
+
flow_style=node.flow_style))
|
106
|
+
for key, value in node.value:
|
107
|
+
self.serialize_node(key, node, None)
|
108
|
+
self.serialize_node(value, node, key)
|
109
|
+
self.emit(MappingEndEvent())
|
110
|
+
self.ascend_resolver()
|
111
|
+
|
@@ -0,0 +1,104 @@
|
|
1
|
+
|
2
|
+
class Token(object):
|
3
|
+
def __init__(self, start_mark, end_mark):
|
4
|
+
self.start_mark = start_mark
|
5
|
+
self.end_mark = end_mark
|
6
|
+
def __repr__(self):
|
7
|
+
attributes = [key for key in self.__dict__
|
8
|
+
if not key.endswith('_mark')]
|
9
|
+
attributes.sort()
|
10
|
+
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
|
11
|
+
for key in attributes])
|
12
|
+
return '%s(%s)' % (self.__class__.__name__, arguments)
|
13
|
+
|
14
|
+
#class BOMToken(Token):
|
15
|
+
# id = '<byte order mark>'
|
16
|
+
|
17
|
+
class DirectiveToken(Token):
|
18
|
+
id = '<directive>'
|
19
|
+
def __init__(self, name, value, start_mark, end_mark):
|
20
|
+
self.name = name
|
21
|
+
self.value = value
|
22
|
+
self.start_mark = start_mark
|
23
|
+
self.end_mark = end_mark
|
24
|
+
|
25
|
+
class DocumentStartToken(Token):
|
26
|
+
id = '<document start>'
|
27
|
+
|
28
|
+
class DocumentEndToken(Token):
|
29
|
+
id = '<document end>'
|
30
|
+
|
31
|
+
class StreamStartToken(Token):
|
32
|
+
id = '<stream start>'
|
33
|
+
def __init__(self, start_mark=None, end_mark=None,
|
34
|
+
encoding=None):
|
35
|
+
self.start_mark = start_mark
|
36
|
+
self.end_mark = end_mark
|
37
|
+
self.encoding = encoding
|
38
|
+
|
39
|
+
class StreamEndToken(Token):
|
40
|
+
id = '<stream end>'
|
41
|
+
|
42
|
+
class BlockSequenceStartToken(Token):
|
43
|
+
id = '<block sequence start>'
|
44
|
+
|
45
|
+
class BlockMappingStartToken(Token):
|
46
|
+
id = '<block mapping start>'
|
47
|
+
|
48
|
+
class BlockEndToken(Token):
|
49
|
+
id = '<block end>'
|
50
|
+
|
51
|
+
class FlowSequenceStartToken(Token):
|
52
|
+
id = '['
|
53
|
+
|
54
|
+
class FlowMappingStartToken(Token):
|
55
|
+
id = '{'
|
56
|
+
|
57
|
+
class FlowSequenceEndToken(Token):
|
58
|
+
id = ']'
|
59
|
+
|
60
|
+
class FlowMappingEndToken(Token):
|
61
|
+
id = '}'
|
62
|
+
|
63
|
+
class KeyToken(Token):
|
64
|
+
id = '?'
|
65
|
+
|
66
|
+
class ValueToken(Token):
|
67
|
+
id = ':'
|
68
|
+
|
69
|
+
class BlockEntryToken(Token):
|
70
|
+
id = '-'
|
71
|
+
|
72
|
+
class FlowEntryToken(Token):
|
73
|
+
id = ','
|
74
|
+
|
75
|
+
class AliasToken(Token):
|
76
|
+
id = '<alias>'
|
77
|
+
def __init__(self, value, start_mark, end_mark):
|
78
|
+
self.value = value
|
79
|
+
self.start_mark = start_mark
|
80
|
+
self.end_mark = end_mark
|
81
|
+
|
82
|
+
class AnchorToken(Token):
|
83
|
+
id = '<anchor>'
|
84
|
+
def __init__(self, value, start_mark, end_mark):
|
85
|
+
self.value = value
|
86
|
+
self.start_mark = start_mark
|
87
|
+
self.end_mark = end_mark
|
88
|
+
|
89
|
+
class TagToken(Token):
|
90
|
+
id = '<tag>'
|
91
|
+
def __init__(self, value, start_mark, end_mark):
|
92
|
+
self.value = value
|
93
|
+
self.start_mark = start_mark
|
94
|
+
self.end_mark = end_mark
|
95
|
+
|
96
|
+
class ScalarToken(Token):
|
97
|
+
id = '<scalar>'
|
98
|
+
def __init__(self, value, plain, start_mark, end_mark, style=None):
|
99
|
+
self.value = value
|
100
|
+
self.plain = plain
|
101
|
+
self.start_mark = start_mark
|
102
|
+
self.end_mark = end_mark
|
103
|
+
self.style = style
|
104
|
+
|
metaflow/cli.py
CHANGED
@@ -7,6 +7,7 @@ from datetime import datetime
|
|
7
7
|
|
8
8
|
import metaflow.tracing as tracing
|
9
9
|
from metaflow._vendor import click
|
10
|
+
from metaflow.system import _system_logger, _system_monitor
|
10
11
|
|
11
12
|
from . import decorators, lint, metaflow_version, parameters, plugins
|
12
13
|
from .cli_args import cli_args
|
@@ -26,7 +27,6 @@ from .metaflow_config import (
|
|
26
27
|
DEFAULT_PACKAGE_SUFFIXES,
|
27
28
|
)
|
28
29
|
from .metaflow_current import current
|
29
|
-
from metaflow.system import _system_monitor, _system_logger
|
30
30
|
from .metaflow_environment import MetaflowEnvironment
|
31
31
|
from .packaging_sys import MetaflowCodeContent
|
32
32
|
from .plugins import (
|
@@ -38,9 +38,9 @@ from .plugins import (
|
|
38
38
|
)
|
39
39
|
from .pylint_wrapper import PyLint
|
40
40
|
from .R import metaflow_r_version, use_r
|
41
|
-
from .util import get_latest_run_id, resolve_identity
|
42
41
|
from .user_configs.config_options import LocalFileInput, config_options
|
43
42
|
from .user_configs.config_parameters import ConfigValue
|
43
|
+
from .util import get_latest_run_id, resolve_identity
|
44
44
|
|
45
45
|
ERASE_TO_EOL = "\033[K"
|
46
46
|
HIGHLIGHT = "red"
|
@@ -56,6 +56,15 @@ def echo_dev_null(*args, **kwargs):
|
|
56
56
|
|
57
57
|
|
58
58
|
def echo_always(line, **kwargs):
|
59
|
+
if kwargs.pop("wrap", False):
|
60
|
+
import textwrap
|
61
|
+
|
62
|
+
indent_str = INDENT if kwargs.get("indent", None) else ""
|
63
|
+
effective_width = 80 - len(indent_str)
|
64
|
+
wrapped = textwrap.wrap(line, width=effective_width, break_long_words=False)
|
65
|
+
line = "\n".join(indent_str + l for l in wrapped)
|
66
|
+
kwargs["indent"] = False
|
67
|
+
|
59
68
|
kwargs["err"] = kwargs.get("err", True)
|
60
69
|
if kwargs.pop("indent", None):
|
61
70
|
line = "\n".join(INDENT + x for x in line.splitlines())
|
metaflow/client/core.py
CHANGED
@@ -831,10 +831,12 @@ class MetaflowCode(object):
|
|
831
831
|
)
|
832
832
|
self._code_obj = BytesIO(blobdata)
|
833
833
|
self._info = MetaflowPackage.cls_get_info(self._code_metadata, self._code_obj)
|
834
|
+
self._code_obj.seek(0)
|
834
835
|
if self._info:
|
835
836
|
self._flowspec = MetaflowPackage.cls_get_content(
|
836
837
|
self._code_metadata, self._code_obj, self._info["script"]
|
837
838
|
)
|
839
|
+
self._code_obj.seek(0)
|
838
840
|
else:
|
839
841
|
raise MetaflowInternalError("Code package metadata is invalid.")
|
840
842
|
|
@@ -885,7 +887,9 @@ class MetaflowCode(object):
|
|
885
887
|
TarFile for everything in this code package
|
886
888
|
"""
|
887
889
|
if self._backend.type == "tgz":
|
888
|
-
|
890
|
+
to_return = self._backend.cls_open(self._code_obj)
|
891
|
+
self._code_obj.seek(0)
|
892
|
+
return to_return
|
889
893
|
raise RuntimeError("Archive is not a tarball")
|
890
894
|
|
891
895
|
def extract(self) -> TemporaryDirectory:
|
@@ -921,6 +925,7 @@ class MetaflowCode(object):
|
|
921
925
|
MetaflowPackage.cls_extract_into(
|
922
926
|
self._code_metadata, self._code_obj, tmp.name, ContentType.USER_CONTENT
|
923
927
|
)
|
928
|
+
self._code_obj.seek(0)
|
924
929
|
return tmp
|
925
930
|
|
926
931
|
@property
|
@@ -205,9 +205,10 @@ def package_mfext_all():
|
|
205
205
|
# the packaged metaflow_extensions directory "self-contained" so that
|
206
206
|
# python doesn't go and search other parts of the system for more
|
207
207
|
# metaflow_extensions.
|
208
|
-
|
209
|
-
os.path.
|
210
|
-
|
208
|
+
if _all_packages:
|
209
|
+
yield os.path.join(
|
210
|
+
os.path.dirname(os.path.abspath(__file__)), "_empty_file.py"
|
211
|
+
), os.path.join(EXT_PKG, "__init__.py")
|
211
212
|
|
212
213
|
for p in _all_packages:
|
213
214
|
for path_tuple in package_mfext_package(p):
|
metaflow/metaflow_environment.py
CHANGED
@@ -203,6 +203,19 @@ class MetaflowEnvironment(object):
|
|
203
203
|
"mfcontent_version": 1,
|
204
204
|
}
|
205
205
|
)
|
206
|
+
|
207
|
+
extra_exports = []
|
208
|
+
for k, v in MetaflowPackage.get_post_extract_env_vars(
|
209
|
+
code_package_metadata, dest_dir="$(pwd)"
|
210
|
+
).items():
|
211
|
+
if k.endswith(":"):
|
212
|
+
# If the value ends with a colon, we override the existing value
|
213
|
+
extra_exports.append("export %s=%s" % (k[:-1], v))
|
214
|
+
else:
|
215
|
+
extra_exports.append(
|
216
|
+
"export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
|
217
|
+
)
|
218
|
+
|
206
219
|
cmds = (
|
207
220
|
[
|
208
221
|
BASH_MFLOG,
|
@@ -226,12 +239,7 @@ class MetaflowEnvironment(object):
|
|
226
239
|
+ MetaflowPackage.get_extract_commands(
|
227
240
|
code_package_metadata, "job.tar", dest_dir="."
|
228
241
|
)
|
229
|
-
+
|
230
|
-
"export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
|
231
|
-
for k, v in MetaflowPackage.get_post_extract_env_vars(
|
232
|
-
code_package_metadata, dest_dir="."
|
233
|
-
).items()
|
234
|
-
]
|
242
|
+
+ extra_exports
|
235
243
|
+ [
|
236
244
|
"mflog 'Task is starting.'",
|
237
245
|
"flush_mflogs",
|
metaflow/package/__init__.py
CHANGED
@@ -17,7 +17,6 @@ from ..packaging_sys.utils import suffix_filter, walk
|
|
17
17
|
from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES
|
18
18
|
from ..exception import MetaflowException
|
19
19
|
from ..user_configs.config_parameters import dump_config_values
|
20
|
-
from ..util import get_metaflow_root
|
21
20
|
from .. import R
|
22
21
|
|
23
22
|
DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
|
@@ -76,12 +75,22 @@ class MetaflowPackage(object):
|
|
76
75
|
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
77
76
|
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
78
|
+
# Be very defensive here to filter modules in case there are
|
79
|
+
# some badly behaved modules that have weird values for
|
80
|
+
# METAFLOW_PACKAGE_POLICY for example.
|
81
|
+
try:
|
82
|
+
if (
|
83
|
+
m.__name__ in FlowMutatorMeta._import_modules
|
84
|
+
or m.__name__ in UserStepDecoratorMeta._import_modules
|
85
|
+
or (
|
86
|
+
hasattr(m, "METAFLOW_PACKAGE_POLICY")
|
87
|
+
and m.METAFLOW_PACKAGE_POLICY == "include"
|
88
|
+
)
|
89
|
+
):
|
90
|
+
return True
|
91
|
+
return False
|
92
|
+
except:
|
93
|
+
return False
|
85
94
|
|
86
95
|
if mfcontent is None:
|
87
96
|
self._mfcontent = MetaflowCodeContentV1(criteria=_module_selector)
|
@@ -350,10 +359,10 @@ class MetaflowPackage(object):
|
|
350
359
|
"""
|
351
360
|
backend = cls.get_backend(pkg_metadata)
|
352
361
|
with backend.cls_open(archive) as opened_archive:
|
353
|
-
|
362
|
+
include_members = MetaflowCodeContent.get_archive_content_members(
|
354
363
|
opened_archive, content_types, backend
|
355
364
|
)
|
356
|
-
backend.
|
365
|
+
backend.cls_extract_members(opened_archive, include_members, dest_dir)
|
357
366
|
|
358
367
|
def user_tuples(self, timeout: Optional[float] = None):
|
359
368
|
# Wait for at least the blob to be formed
|
@@ -118,9 +118,7 @@ class MetaflowCodeContent:
|
|
118
118
|
return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
|
119
119
|
|
120
120
|
@classmethod
|
121
|
-
def get_env_vars_for_packaged_metaflow(
|
122
|
-
cls, dest_dir: str
|
123
|
-
) -> Optional[Dict[str, str]]:
|
121
|
+
def get_env_vars_for_packaged_metaflow(cls, dest_dir: str) -> Dict[str, str]:
|
124
122
|
"""
|
125
123
|
Get the environment variables that are needed to run Metaflow when it is
|
126
124
|
packaged. This is typically used to set the PYTHONPATH to include the
|
@@ -128,17 +126,19 @@ class MetaflowCodeContent:
|
|
128
126
|
|
129
127
|
Returns
|
130
128
|
-------
|
131
|
-
|
129
|
+
Dict[str, str]
|
132
130
|
The environment variables that are needed to run Metaflow when it is
|
133
|
-
packaged
|
131
|
+
packaged it present.
|
134
132
|
"""
|
135
|
-
mfcontent_info = cls._extract_mfcontent_info()
|
133
|
+
mfcontent_info = cls._extract_mfcontent_info(dest_dir)
|
136
134
|
if mfcontent_info is None:
|
137
135
|
# No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
|
138
136
|
# package so no environment variables to set.
|
139
|
-
return
|
137
|
+
return {}
|
140
138
|
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
141
|
-
|
139
|
+
v = handling_cls.get_post_extract_env_vars_impl(dest_dir)
|
140
|
+
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
|
141
|
+
return v
|
142
142
|
|
143
143
|
@classmethod
|
144
144
|
def get_archive_info(
|
@@ -216,15 +216,15 @@ class MetaflowCodeContent:
|
|
216
216
|
)
|
217
217
|
|
218
218
|
@classmethod
|
219
|
-
def
|
219
|
+
def get_archive_content_members(
|
220
220
|
cls,
|
221
221
|
archive: Any,
|
222
222
|
content_types: Optional[int] = None,
|
223
223
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
224
|
-
) -> List[
|
224
|
+
) -> List[Any]:
|
225
225
|
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
226
226
|
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
227
|
-
return handling_cls.
|
227
|
+
return handling_cls.get_archive_content_members_impl(
|
228
228
|
mfcontent_info, archive, content_types, packaging_backend
|
229
229
|
)
|
230
230
|
|
@@ -276,7 +276,9 @@ class MetaflowCodeContent:
|
|
276
276
|
"Invalid package -- unknown version %s in info: %s"
|
277
277
|
% (version_id, cls._mappings)
|
278
278
|
)
|
279
|
-
|
279
|
+
v = cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
|
280
|
+
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
|
281
|
+
return v
|
280
282
|
|
281
283
|
# Implement the _impl methods in the base subclass (in this file). These need to
|
282
284
|
# happen with as few imports as possible to prevent circular dependencies.
|
@@ -337,14 +339,14 @@ class MetaflowCodeContent:
|
|
337
339
|
raise NotImplementedError("get_archive_filename_impl not implemented")
|
338
340
|
|
339
341
|
@classmethod
|
340
|
-
def
|
342
|
+
def get_archive_content_members_impl(
|
341
343
|
cls,
|
342
344
|
mfcontent_info: Optional[Dict[str, Any]],
|
343
345
|
archive: Any,
|
344
346
|
content_types: Optional[int] = None,
|
345
347
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
346
|
-
) -> List[
|
347
|
-
raise NotImplementedError("
|
348
|
+
) -> List[Any]:
|
349
|
+
raise NotImplementedError("get_archive_content_members_impl not implemented")
|
348
350
|
|
349
351
|
@classmethod
|
350
352
|
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
|
@@ -523,19 +525,22 @@ class MetaflowCodeContent:
|
|
523
525
|
return mfcontent_info
|
524
526
|
|
525
527
|
@classmethod
|
526
|
-
def _extract_mfcontent_info(
|
527
|
-
|
528
|
-
|
528
|
+
def _extract_mfcontent_info(
|
529
|
+
cls, target_dir: Optional[str] = None
|
530
|
+
) -> Optional[Dict[str, Any]]:
|
531
|
+
target_dir = target_dir or "_local"
|
532
|
+
if target_dir in cls._cached_mfcontent_info:
|
533
|
+
return cls._cached_mfcontent_info[target_dir]
|
529
534
|
|
530
535
|
mfcontent_info = None # type: Optional[Dict[str, Any]]
|
531
|
-
if
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
) as f:
|
536
|
+
if target_dir == "_local":
|
537
|
+
root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
|
538
|
+
else:
|
539
|
+
root = target_dir
|
540
|
+
if os.path.exists(os.path.join(root, MFCONTENT_MARKER)):
|
541
|
+
with open(os.path.join(root, MFCONTENT_MARKER), "r", encoding="utf-8") as f:
|
537
542
|
mfcontent_info = json.load(f)
|
538
|
-
cls._cached_mfcontent_info[
|
543
|
+
cls._cached_mfcontent_info[target_dir] = mfcontent_info
|
539
544
|
return mfcontent_info
|
540
545
|
|
541
546
|
def get_package_version(self) -> int:
|
@@ -627,13 +632,13 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
|
|
627
632
|
return None
|
628
633
|
|
629
634
|
@classmethod
|
630
|
-
def
|
635
|
+
def get_archive_content_members_impl(
|
631
636
|
cls,
|
632
637
|
mfcontent_info: Optional[Dict[str, Any]],
|
633
638
|
archive: Any,
|
634
639
|
content_types: Optional[int] = None,
|
635
640
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
636
|
-
) -> List[
|
641
|
+
) -> List[Any]:
|
637
642
|
"""
|
638
643
|
For V0, we use a static list of known files to classify the content
|
639
644
|
"""
|
@@ -649,16 +654,20 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
|
|
649
654
|
"condav2-1.cnd": ContentType.OTHER_CONTENT.value,
|
650
655
|
}
|
651
656
|
to_return = []
|
652
|
-
for
|
657
|
+
for member in packaging_backend.cls_list_members(archive):
|
658
|
+
filename = packaging_backend.cls_member_name(member)
|
659
|
+
added = False
|
653
660
|
for prefix, classification in known_prefixes.items():
|
654
661
|
if (
|
655
662
|
prefix[-1] == "/" and filename.startswith(prefix)
|
656
663
|
) or prefix == filename:
|
657
664
|
if content_types & classification:
|
658
|
-
to_return.append(
|
659
|
-
|
660
|
-
|
661
|
-
|
665
|
+
to_return.append(member)
|
666
|
+
added = True
|
667
|
+
break
|
668
|
+
if not added and content_types & ContentType.USER_CONTENT.value:
|
669
|
+
# Everything else is user content
|
670
|
+
to_return.append(member)
|
662
671
|
return to_return
|
663
672
|
|
664
673
|
@classmethod
|
@@ -705,7 +714,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
705
714
|
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
706
715
|
) -> str:
|
707
716
|
if in_archive:
|
708
|
-
return filename
|
717
|
+
return os.path.join(cls._other_dir, filename)
|
709
718
|
return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
|
710
719
|
|
711
720
|
@classmethod
|
@@ -713,7 +722,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
713
722
|
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
714
723
|
) -> str:
|
715
724
|
if in_archive:
|
716
|
-
return filename
|
725
|
+
return os.path.join(cls._code_dir, filename)
|
717
726
|
return os.path.join(get_metaflow_root(), filename)
|
718
727
|
|
719
728
|
@classmethod
|
@@ -832,37 +841,38 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
832
841
|
return None
|
833
842
|
|
834
843
|
@classmethod
|
835
|
-
def
|
844
|
+
def get_archive_content_members_impl(
|
836
845
|
cls,
|
837
846
|
mfcontent_info: Optional[Dict[str, Any]],
|
838
847
|
archive: Any,
|
839
848
|
content_types: Optional[int] = None,
|
840
849
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
841
|
-
) -> List[
|
850
|
+
) -> List[Any]:
|
842
851
|
to_return = []
|
843
852
|
module_content = set(mfcontent_info.get("module_files", []))
|
844
|
-
for
|
853
|
+
for member in packaging_backend.cls_list_members(archive):
|
854
|
+
filename = packaging_backend.cls_member_name(member)
|
845
855
|
if filename.startswith(cls._other_dir) and (
|
846
856
|
content_types & ContentType.OTHER_CONTENT.value
|
847
857
|
):
|
848
|
-
to_return.append(
|
858
|
+
to_return.append(member)
|
849
859
|
elif filename.startswith(cls._code_dir):
|
850
860
|
# Special case for marker which is a other content even if in code.
|
851
|
-
if filename ==
|
861
|
+
if filename == MFCONTENT_MARKER:
|
852
862
|
if content_types & ContentType.OTHER_CONTENT.value:
|
853
|
-
to_return.append(
|
863
|
+
to_return.append(member)
|
854
864
|
else:
|
855
865
|
continue
|
856
866
|
# Here it is either module or code
|
857
867
|
if os.path.join(cls._code_dir, filename) in module_content:
|
858
868
|
if content_types & ContentType.MODULE_CONTENT.value:
|
859
|
-
to_return.append(
|
869
|
+
to_return.append(member)
|
860
870
|
elif content_types & ContentType.CODE_CONTENT.value:
|
861
|
-
to_return.append(
|
871
|
+
to_return.append(member)
|
862
872
|
else:
|
863
873
|
if content_types & ContentType.USER_CONTENT.value:
|
864
874
|
# Everything else is user content
|
865
|
-
to_return.append(
|
875
|
+
to_return.append(member)
|
866
876
|
return to_return
|
867
877
|
|
868
878
|
@classmethod
|
@@ -57,6 +57,15 @@ class PackagingBackend(ABC):
|
|
57
57
|
"""Open the archive from the given content."""
|
58
58
|
pass
|
59
59
|
|
60
|
+
@classmethod
|
61
|
+
@abstractmethod
|
62
|
+
def cls_member_name(cls, member: Union[Any, str]) -> str:
|
63
|
+
"""
|
64
|
+
Returns the name of the member as a string.
|
65
|
+
This is used to ensure consistent naming across different archive formats.
|
66
|
+
"""
|
67
|
+
pass
|
68
|
+
|
60
69
|
@classmethod
|
61
70
|
@abstractmethod
|
62
71
|
def cls_has_member(cls, archive: Any, name: str) -> bool:
|
@@ -72,14 +81,20 @@ class PackagingBackend(ABC):
|
|
72
81
|
def cls_extract_members(
|
73
82
|
cls,
|
74
83
|
archive: Any,
|
75
|
-
members: Optional[List[
|
84
|
+
members: Optional[List[Any]] = None,
|
76
85
|
dest_dir: str = ".",
|
77
86
|
) -> None:
|
78
87
|
pass
|
79
88
|
|
80
89
|
@classmethod
|
81
90
|
@abstractmethod
|
82
|
-
def
|
91
|
+
def cls_list_names(cls, archive: Any) -> Optional[List[str]]:
|
92
|
+
pass
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
@abstractmethod
|
96
|
+
def cls_list_members(cls, archive: Any) -> Optional[List[Any]]:
|
97
|
+
"""List all members in the archive."""
|
83
98
|
pass
|
84
99
|
|
85
100
|
def has_member(self, name: str) -> bool:
|
@@ -93,17 +108,17 @@ class PackagingBackend(ABC):
|
|
93
108
|
raise ValueError("Cannot get member from an uncreated archive")
|
94
109
|
|
95
110
|
def extract_members(
|
96
|
-
self, members: Optional[List[
|
111
|
+
self, members: Optional[List[Any]] = None, dest_dir: str = "."
|
97
112
|
) -> None:
|
98
113
|
if self._archive:
|
99
114
|
self.cls_extract_members(self._archive, members, dest_dir)
|
100
115
|
else:
|
101
116
|
raise ValueError("Cannot extract from an uncreated archive")
|
102
117
|
|
103
|
-
def
|
118
|
+
def list_names(self) -> Optional[List[str]]:
|
104
119
|
if self._archive:
|
105
|
-
return self.
|
106
|
-
raise ValueError("Cannot list
|
120
|
+
return self.cls_list_names(self._archive)
|
121
|
+
raise ValueError("Cannot list names from an uncreated archive")
|
107
122
|
|
108
123
|
def __enter__(self):
|
109
124
|
self.create()
|