metaflow 2.16.6__py2.py3-none-any.whl → 2.16.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. metaflow/_vendor/click/core.py +3 -4
  2. metaflow/_vendor/imghdr/__init__.py +11 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cli.py +11 -2
  21. metaflow/client/core.py +6 -1
  22. metaflow/extension_support/__init__.py +4 -3
  23. metaflow/metaflow_environment.py +14 -6
  24. metaflow/package/__init__.py +18 -9
  25. metaflow/packaging_sys/__init__.py +53 -43
  26. metaflow/packaging_sys/backend.py +21 -6
  27. metaflow/packaging_sys/tar_backend.py +16 -3
  28. metaflow/packaging_sys/v1.py +21 -21
  29. metaflow/plugins/argo/argo_workflows_deployer_objects.py +37 -0
  30. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +16 -0
  31. metaflow/plugins/cards/card_modules/convert_to_native_type.py +7 -1
  32. metaflow/plugins/pypi/conda_decorator.py +4 -2
  33. metaflow/runner/click_api.py +14 -7
  34. metaflow/runner/deployer.py +77 -0
  35. metaflow/runner/subprocess_manager.py +20 -12
  36. metaflow/vendor.py +23 -6
  37. metaflow/version.py +1 -1
  38. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/METADATA +2 -2
  39. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/RECORD +46 -29
  40. {metaflow-2.16.6.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/Makefile +0 -0
  41. {metaflow-2.16.6.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  42. {metaflow-2.16.6.data → metaflow-2.16.7.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  43. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/WHEEL +0 -0
  44. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/entry_points.txt +0 -0
  45. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/licenses/LICENSE +0 -0
  46. {metaflow-2.16.6.dist-info → metaflow-2.16.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+
2
+ __all__ = ['Serializer', 'SerializerError']
3
+
4
+ from .error import YAMLError
5
+ from .events import *
6
+ from .nodes import *
7
+
8
+ class SerializerError(YAMLError):
9
+ pass
10
+
11
+ class Serializer:
12
+
13
+ ANCHOR_TEMPLATE = 'id%03d'
14
+
15
+ def __init__(self, encoding=None,
16
+ explicit_start=None, explicit_end=None, version=None, tags=None):
17
+ self.use_encoding = encoding
18
+ self.use_explicit_start = explicit_start
19
+ self.use_explicit_end = explicit_end
20
+ self.use_version = version
21
+ self.use_tags = tags
22
+ self.serialized_nodes = {}
23
+ self.anchors = {}
24
+ self.last_anchor_id = 0
25
+ self.closed = None
26
+
27
+ def open(self):
28
+ if self.closed is None:
29
+ self.emit(StreamStartEvent(encoding=self.use_encoding))
30
+ self.closed = False
31
+ elif self.closed:
32
+ raise SerializerError("serializer is closed")
33
+ else:
34
+ raise SerializerError("serializer is already opened")
35
+
36
+ def close(self):
37
+ if self.closed is None:
38
+ raise SerializerError("serializer is not opened")
39
+ elif not self.closed:
40
+ self.emit(StreamEndEvent())
41
+ self.closed = True
42
+
43
+ #def __del__(self):
44
+ # self.close()
45
+
46
+ def serialize(self, node):
47
+ if self.closed is None:
48
+ raise SerializerError("serializer is not opened")
49
+ elif self.closed:
50
+ raise SerializerError("serializer is closed")
51
+ self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
52
+ version=self.use_version, tags=self.use_tags))
53
+ self.anchor_node(node)
54
+ self.serialize_node(node, None, None)
55
+ self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
56
+ self.serialized_nodes = {}
57
+ self.anchors = {}
58
+ self.last_anchor_id = 0
59
+
60
+ def anchor_node(self, node):
61
+ if node in self.anchors:
62
+ if self.anchors[node] is None:
63
+ self.anchors[node] = self.generate_anchor(node)
64
+ else:
65
+ self.anchors[node] = None
66
+ if isinstance(node, SequenceNode):
67
+ for item in node.value:
68
+ self.anchor_node(item)
69
+ elif isinstance(node, MappingNode):
70
+ for key, value in node.value:
71
+ self.anchor_node(key)
72
+ self.anchor_node(value)
73
+
74
+ def generate_anchor(self, node):
75
+ self.last_anchor_id += 1
76
+ return self.ANCHOR_TEMPLATE % self.last_anchor_id
77
+
78
+ def serialize_node(self, node, parent, index):
79
+ alias = self.anchors[node]
80
+ if node in self.serialized_nodes:
81
+ self.emit(AliasEvent(alias))
82
+ else:
83
+ self.serialized_nodes[node] = True
84
+ self.descend_resolver(parent, index)
85
+ if isinstance(node, ScalarNode):
86
+ detected_tag = self.resolve(ScalarNode, node.value, (True, False))
87
+ default_tag = self.resolve(ScalarNode, node.value, (False, True))
88
+ implicit = (node.tag == detected_tag), (node.tag == default_tag)
89
+ self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
90
+ style=node.style))
91
+ elif isinstance(node, SequenceNode):
92
+ implicit = (node.tag
93
+ == self.resolve(SequenceNode, node.value, True))
94
+ self.emit(SequenceStartEvent(alias, node.tag, implicit,
95
+ flow_style=node.flow_style))
96
+ index = 0
97
+ for item in node.value:
98
+ self.serialize_node(item, node, index)
99
+ index += 1
100
+ self.emit(SequenceEndEvent())
101
+ elif isinstance(node, MappingNode):
102
+ implicit = (node.tag
103
+ == self.resolve(MappingNode, node.value, True))
104
+ self.emit(MappingStartEvent(alias, node.tag, implicit,
105
+ flow_style=node.flow_style))
106
+ for key, value in node.value:
107
+ self.serialize_node(key, node, None)
108
+ self.serialize_node(value, node, key)
109
+ self.emit(MappingEndEvent())
110
+ self.ascend_resolver()
111
+
@@ -0,0 +1,104 @@
1
+
2
+ class Token(object):
3
+ def __init__(self, start_mark, end_mark):
4
+ self.start_mark = start_mark
5
+ self.end_mark = end_mark
6
+ def __repr__(self):
7
+ attributes = [key for key in self.__dict__
8
+ if not key.endswith('_mark')]
9
+ attributes.sort()
10
+ arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
11
+ for key in attributes])
12
+ return '%s(%s)' % (self.__class__.__name__, arguments)
13
+
14
+ #class BOMToken(Token):
15
+ # id = '<byte order mark>'
16
+
17
+ class DirectiveToken(Token):
18
+ id = '<directive>'
19
+ def __init__(self, name, value, start_mark, end_mark):
20
+ self.name = name
21
+ self.value = value
22
+ self.start_mark = start_mark
23
+ self.end_mark = end_mark
24
+
25
+ class DocumentStartToken(Token):
26
+ id = '<document start>'
27
+
28
+ class DocumentEndToken(Token):
29
+ id = '<document end>'
30
+
31
+ class StreamStartToken(Token):
32
+ id = '<stream start>'
33
+ def __init__(self, start_mark=None, end_mark=None,
34
+ encoding=None):
35
+ self.start_mark = start_mark
36
+ self.end_mark = end_mark
37
+ self.encoding = encoding
38
+
39
+ class StreamEndToken(Token):
40
+ id = '<stream end>'
41
+
42
+ class BlockSequenceStartToken(Token):
43
+ id = '<block sequence start>'
44
+
45
+ class BlockMappingStartToken(Token):
46
+ id = '<block mapping start>'
47
+
48
+ class BlockEndToken(Token):
49
+ id = '<block end>'
50
+
51
+ class FlowSequenceStartToken(Token):
52
+ id = '['
53
+
54
+ class FlowMappingStartToken(Token):
55
+ id = '{'
56
+
57
+ class FlowSequenceEndToken(Token):
58
+ id = ']'
59
+
60
+ class FlowMappingEndToken(Token):
61
+ id = '}'
62
+
63
+ class KeyToken(Token):
64
+ id = '?'
65
+
66
+ class ValueToken(Token):
67
+ id = ':'
68
+
69
+ class BlockEntryToken(Token):
70
+ id = '-'
71
+
72
+ class FlowEntryToken(Token):
73
+ id = ','
74
+
75
+ class AliasToken(Token):
76
+ id = '<alias>'
77
+ def __init__(self, value, start_mark, end_mark):
78
+ self.value = value
79
+ self.start_mark = start_mark
80
+ self.end_mark = end_mark
81
+
82
+ class AnchorToken(Token):
83
+ id = '<anchor>'
84
+ def __init__(self, value, start_mark, end_mark):
85
+ self.value = value
86
+ self.start_mark = start_mark
87
+ self.end_mark = end_mark
88
+
89
+ class TagToken(Token):
90
+ id = '<tag>'
91
+ def __init__(self, value, start_mark, end_mark):
92
+ self.value = value
93
+ self.start_mark = start_mark
94
+ self.end_mark = end_mark
95
+
96
+ class ScalarToken(Token):
97
+ id = '<scalar>'
98
+ def __init__(self, value, plain, start_mark, end_mark, style=None):
99
+ self.value = value
100
+ self.plain = plain
101
+ self.start_mark = start_mark
102
+ self.end_mark = end_mark
103
+ self.style = style
104
+
metaflow/cli.py CHANGED
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
 
8
8
  import metaflow.tracing as tracing
9
9
  from metaflow._vendor import click
10
+ from metaflow.system import _system_logger, _system_monitor
10
11
 
11
12
  from . import decorators, lint, metaflow_version, parameters, plugins
12
13
  from .cli_args import cli_args
@@ -26,7 +27,6 @@ from .metaflow_config import (
26
27
  DEFAULT_PACKAGE_SUFFIXES,
27
28
  )
28
29
  from .metaflow_current import current
29
- from metaflow.system import _system_monitor, _system_logger
30
30
  from .metaflow_environment import MetaflowEnvironment
31
31
  from .packaging_sys import MetaflowCodeContent
32
32
  from .plugins import (
@@ -38,9 +38,9 @@ from .plugins import (
38
38
  )
39
39
  from .pylint_wrapper import PyLint
40
40
  from .R import metaflow_r_version, use_r
41
- from .util import get_latest_run_id, resolve_identity
42
41
  from .user_configs.config_options import LocalFileInput, config_options
43
42
  from .user_configs.config_parameters import ConfigValue
43
+ from .util import get_latest_run_id, resolve_identity
44
44
 
45
45
  ERASE_TO_EOL = "\033[K"
46
46
  HIGHLIGHT = "red"
@@ -56,6 +56,15 @@ def echo_dev_null(*args, **kwargs):
56
56
 
57
57
 
58
58
  def echo_always(line, **kwargs):
59
+ if kwargs.pop("wrap", False):
60
+ import textwrap
61
+
62
+ indent_str = INDENT if kwargs.get("indent", None) else ""
63
+ effective_width = 80 - len(indent_str)
64
+ wrapped = textwrap.wrap(line, width=effective_width, break_long_words=False)
65
+ line = "\n".join(indent_str + l for l in wrapped)
66
+ kwargs["indent"] = False
67
+
59
68
  kwargs["err"] = kwargs.get("err", True)
60
69
  if kwargs.pop("indent", None):
61
70
  line = "\n".join(INDENT + x for x in line.splitlines())
metaflow/client/core.py CHANGED
@@ -831,10 +831,12 @@ class MetaflowCode(object):
831
831
  )
832
832
  self._code_obj = BytesIO(blobdata)
833
833
  self._info = MetaflowPackage.cls_get_info(self._code_metadata, self._code_obj)
834
+ self._code_obj.seek(0)
834
835
  if self._info:
835
836
  self._flowspec = MetaflowPackage.cls_get_content(
836
837
  self._code_metadata, self._code_obj, self._info["script"]
837
838
  )
839
+ self._code_obj.seek(0)
838
840
  else:
839
841
  raise MetaflowInternalError("Code package metadata is invalid.")
840
842
 
@@ -885,7 +887,9 @@ class MetaflowCode(object):
885
887
  TarFile for everything in this code package
886
888
  """
887
889
  if self._backend.type == "tgz":
888
- return self._backend.cls_open(self._code_obj)
890
+ to_return = self._backend.cls_open(self._code_obj)
891
+ self._code_obj.seek(0)
892
+ return to_return
889
893
  raise RuntimeError("Archive is not a tarball")
890
894
 
891
895
  def extract(self) -> TemporaryDirectory:
@@ -921,6 +925,7 @@ class MetaflowCode(object):
921
925
  MetaflowPackage.cls_extract_into(
922
926
  self._code_metadata, self._code_obj, tmp.name, ContentType.USER_CONTENT
923
927
  )
928
+ self._code_obj.seek(0)
924
929
  return tmp
925
930
 
926
931
  @property
@@ -205,9 +205,10 @@ def package_mfext_all():
205
205
  # the packaged metaflow_extensions directory "self-contained" so that
206
206
  # python doesn't go and search other parts of the system for more
207
207
  # metaflow_extensions.
208
- yield os.path.join(
209
- os.path.dirname(os.path.abspath(__file__)), "_empty_file.py"
210
- ), os.path.join(EXT_PKG, "__init__.py")
208
+ if _all_packages:
209
+ yield os.path.join(
210
+ os.path.dirname(os.path.abspath(__file__)), "_empty_file.py"
211
+ ), os.path.join(EXT_PKG, "__init__.py")
211
212
 
212
213
  for p in _all_packages:
213
214
  for path_tuple in package_mfext_package(p):
@@ -203,6 +203,19 @@ class MetaflowEnvironment(object):
203
203
  "mfcontent_version": 1,
204
204
  }
205
205
  )
206
+
207
+ extra_exports = []
208
+ for k, v in MetaflowPackage.get_post_extract_env_vars(
209
+ code_package_metadata, dest_dir="$(pwd)"
210
+ ).items():
211
+ if k.endswith(":"):
212
+ # If the value ends with a colon, we override the existing value
213
+ extra_exports.append("export %s=%s" % (k[:-1], v))
214
+ else:
215
+ extra_exports.append(
216
+ "export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
217
+ )
218
+
206
219
  cmds = (
207
220
  [
208
221
  BASH_MFLOG,
@@ -226,12 +239,7 @@ class MetaflowEnvironment(object):
226
239
  + MetaflowPackage.get_extract_commands(
227
240
  code_package_metadata, "job.tar", dest_dir="."
228
241
  )
229
- + [
230
- "export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
231
- for k, v in MetaflowPackage.get_post_extract_env_vars(
232
- code_package_metadata, dest_dir="."
233
- ).items()
234
- ]
242
+ + extra_exports
235
243
  + [
236
244
  "mflog 'Task is starting.'",
237
245
  "flush_mflogs",
@@ -17,7 +17,6 @@ from ..packaging_sys.utils import suffix_filter, walk
17
17
  from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES
18
18
  from ..exception import MetaflowException
19
19
  from ..user_configs.config_parameters import dump_config_values
20
- from ..util import get_metaflow_root
21
20
  from .. import R
22
21
 
23
22
  DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
@@ -76,12 +75,22 @@ class MetaflowPackage(object):
76
75
  from ..user_decorators.user_flow_decorator import FlowMutatorMeta
77
76
  from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
78
77
 
79
- if (
80
- m.__name__ in FlowMutatorMeta._import_modules
81
- or m.__name__ in UserStepDecoratorMeta._import_modules
82
- or hasattr(m, "METAFLOW_PACKAGE")
83
- ):
84
- return True
78
+ # Be very defensive here to filter modules in case there are
79
+ # some badly behaved modules that have weird values for
80
+ # METAFLOW_PACKAGE_POLICY for example.
81
+ try:
82
+ if (
83
+ m.__name__ in FlowMutatorMeta._import_modules
84
+ or m.__name__ in UserStepDecoratorMeta._import_modules
85
+ or (
86
+ hasattr(m, "METAFLOW_PACKAGE_POLICY")
87
+ and m.METAFLOW_PACKAGE_POLICY == "include"
88
+ )
89
+ ):
90
+ return True
91
+ return False
92
+ except:
93
+ return False
85
94
 
86
95
  if mfcontent is None:
87
96
  self._mfcontent = MetaflowCodeContentV1(criteria=_module_selector)
@@ -350,10 +359,10 @@ class MetaflowPackage(object):
350
359
  """
351
360
  backend = cls.get_backend(pkg_metadata)
352
361
  with backend.cls_open(archive) as opened_archive:
353
- include_names = MetaflowCodeContent.get_archive_content_names(
362
+ include_members = MetaflowCodeContent.get_archive_content_members(
354
363
  opened_archive, content_types, backend
355
364
  )
356
- backend.extract_members(include_names, dest_dir)
365
+ backend.cls_extract_members(opened_archive, include_members, dest_dir)
357
366
 
358
367
  def user_tuples(self, timeout: Optional[float] = None):
359
368
  # Wait for at least the blob to be formed
@@ -118,9 +118,7 @@ class MetaflowCodeContent:
118
118
  return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
119
119
 
120
120
  @classmethod
121
- def get_env_vars_for_packaged_metaflow(
122
- cls, dest_dir: str
123
- ) -> Optional[Dict[str, str]]:
121
+ def get_env_vars_for_packaged_metaflow(cls, dest_dir: str) -> Dict[str, str]:
124
122
  """
125
123
  Get the environment variables that are needed to run Metaflow when it is
126
124
  packaged. This is typically used to set the PYTHONPATH to include the
@@ -128,17 +126,19 @@ class MetaflowCodeContent:
128
126
 
129
127
  Returns
130
128
  -------
131
- Optional[Dict[str, str]]
129
+ Dict[str, str]
132
130
  The environment variables that are needed to run Metaflow when it is
133
- packaged -- None if there are no such variables (not packaged for example)
131
+ packaged it present.
134
132
  """
135
- mfcontent_info = cls._extract_mfcontent_info()
133
+ mfcontent_info = cls._extract_mfcontent_info(dest_dir)
136
134
  if mfcontent_info is None:
137
135
  # No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
138
136
  # package so no environment variables to set.
139
- return None
137
+ return {}
140
138
  handling_cls = cls._get_mfcontent_class(mfcontent_info)
141
- return handling_cls.get_post_extract_env_vars_impl(dest_dir)
139
+ v = handling_cls.get_post_extract_env_vars_impl(dest_dir)
140
+ v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
141
+ return v
142
142
 
143
143
  @classmethod
144
144
  def get_archive_info(
@@ -216,15 +216,15 @@ class MetaflowCodeContent:
216
216
  )
217
217
 
218
218
  @classmethod
219
- def get_archive_content_names(
219
+ def get_archive_content_members(
220
220
  cls,
221
221
  archive: Any,
222
222
  content_types: Optional[int] = None,
223
223
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
224
- ) -> List[str]:
224
+ ) -> List[Any]:
225
225
  mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
226
226
  handling_cls = cls._get_mfcontent_class(mfcontent_info)
227
- return handling_cls.get_archive_content_names_impl(
227
+ return handling_cls.get_archive_content_members_impl(
228
228
  mfcontent_info, archive, content_types, packaging_backend
229
229
  )
230
230
 
@@ -276,7 +276,9 @@ class MetaflowCodeContent:
276
276
  "Invalid package -- unknown version %s in info: %s"
277
277
  % (version_id, cls._mappings)
278
278
  )
279
- return cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
279
+ v = cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
280
+ v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
281
+ return v
280
282
 
281
283
  # Implement the _impl methods in the base subclass (in this file). These need to
282
284
  # happen with as few imports as possible to prevent circular dependencies.
@@ -337,14 +339,14 @@ class MetaflowCodeContent:
337
339
  raise NotImplementedError("get_archive_filename_impl not implemented")
338
340
 
339
341
  @classmethod
340
- def get_archive_content_names_impl(
342
+ def get_archive_content_members_impl(
341
343
  cls,
342
344
  mfcontent_info: Optional[Dict[str, Any]],
343
345
  archive: Any,
344
346
  content_types: Optional[int] = None,
345
347
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
346
- ) -> List[str]:
347
- raise NotImplementedError("get_archive_content_names_impl not implemented")
348
+ ) -> List[Any]:
349
+ raise NotImplementedError("get_archive_content_members_impl not implemented")
348
350
 
349
351
  @classmethod
350
352
  def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
@@ -523,19 +525,22 @@ class MetaflowCodeContent:
523
525
  return mfcontent_info
524
526
 
525
527
  @classmethod
526
- def _extract_mfcontent_info(cls) -> Optional[Dict[str, Any]]:
527
- if "_local" in cls._cached_mfcontent_info:
528
- return cls._cached_mfcontent_info["_local"]
528
+ def _extract_mfcontent_info(
529
+ cls, target_dir: Optional[str] = None
530
+ ) -> Optional[Dict[str, Any]]:
531
+ target_dir = target_dir or "_local"
532
+ if target_dir in cls._cached_mfcontent_info:
533
+ return cls._cached_mfcontent_info[target_dir]
529
534
 
530
535
  mfcontent_info = None # type: Optional[Dict[str, Any]]
531
- if os.path.exists(os.path.join(get_metaflow_root(), MFCONTENT_MARKER)):
532
- with open(
533
- os.path.join(get_metaflow_root(), MFCONTENT_MARKER),
534
- "r",
535
- encoding="utf-8",
536
- ) as f:
536
+ if target_dir == "_local":
537
+ root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
538
+ else:
539
+ root = target_dir
540
+ if os.path.exists(os.path.join(root, MFCONTENT_MARKER)):
541
+ with open(os.path.join(root, MFCONTENT_MARKER), "r", encoding="utf-8") as f:
537
542
  mfcontent_info = json.load(f)
538
- cls._cached_mfcontent_info["_local"] = mfcontent_info
543
+ cls._cached_mfcontent_info[target_dir] = mfcontent_info
539
544
  return mfcontent_info
540
545
 
541
546
  def get_package_version(self) -> int:
@@ -627,13 +632,13 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
627
632
  return None
628
633
 
629
634
  @classmethod
630
- def get_archive_content_names_impl(
635
+ def get_archive_content_members_impl(
631
636
  cls,
632
637
  mfcontent_info: Optional[Dict[str, Any]],
633
638
  archive: Any,
634
639
  content_types: Optional[int] = None,
635
640
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
636
- ) -> List[str]:
641
+ ) -> List[Any]:
637
642
  """
638
643
  For V0, we use a static list of known files to classify the content
639
644
  """
@@ -649,16 +654,20 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
649
654
  "condav2-1.cnd": ContentType.OTHER_CONTENT.value,
650
655
  }
651
656
  to_return = []
652
- for filename in packaging_backend.cls_list_members(archive):
657
+ for member in packaging_backend.cls_list_members(archive):
658
+ filename = packaging_backend.cls_member_name(member)
659
+ added = False
653
660
  for prefix, classification in known_prefixes.items():
654
661
  if (
655
662
  prefix[-1] == "/" and filename.startswith(prefix)
656
663
  ) or prefix == filename:
657
664
  if content_types & classification:
658
- to_return.append(filename)
659
- elif content_types & ContentType.USER_CONTENT.value:
660
- # Everything else is user content
661
- to_return.append(filename)
665
+ to_return.append(member)
666
+ added = True
667
+ break
668
+ if not added and content_types & ContentType.USER_CONTENT.value:
669
+ # Everything else is user content
670
+ to_return.append(member)
662
671
  return to_return
663
672
 
664
673
  @classmethod
@@ -705,7 +714,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
705
714
  cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
706
715
  ) -> str:
707
716
  if in_archive:
708
- return filename
717
+ return os.path.join(cls._other_dir, filename)
709
718
  return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
710
719
 
711
720
  @classmethod
@@ -713,7 +722,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
713
722
  cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
714
723
  ) -> str:
715
724
  if in_archive:
716
- return filename
725
+ return os.path.join(cls._code_dir, filename)
717
726
  return os.path.join(get_metaflow_root(), filename)
718
727
 
719
728
  @classmethod
@@ -832,37 +841,38 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
832
841
  return None
833
842
 
834
843
  @classmethod
835
- def get_archive_content_names_impl(
844
+ def get_archive_content_members_impl(
836
845
  cls,
837
846
  mfcontent_info: Optional[Dict[str, Any]],
838
847
  archive: Any,
839
848
  content_types: Optional[int] = None,
840
849
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
841
- ) -> List[str]:
850
+ ) -> List[Any]:
842
851
  to_return = []
843
852
  module_content = set(mfcontent_info.get("module_files", []))
844
- for filename in packaging_backend.cls_list_members(archive):
853
+ for member in packaging_backend.cls_list_members(archive):
854
+ filename = packaging_backend.cls_member_name(member)
845
855
  if filename.startswith(cls._other_dir) and (
846
856
  content_types & ContentType.OTHER_CONTENT.value
847
857
  ):
848
- to_return.append(filename)
858
+ to_return.append(member)
849
859
  elif filename.startswith(cls._code_dir):
850
860
  # Special case for marker which is a other content even if in code.
851
- if filename == f"{cls._code_dir}/{MFCONTENT_MARKER}":
861
+ if filename == MFCONTENT_MARKER:
852
862
  if content_types & ContentType.OTHER_CONTENT.value:
853
- to_return.append(filename)
863
+ to_return.append(member)
854
864
  else:
855
865
  continue
856
866
  # Here it is either module or code
857
867
  if os.path.join(cls._code_dir, filename) in module_content:
858
868
  if content_types & ContentType.MODULE_CONTENT.value:
859
- to_return.append(filename)
869
+ to_return.append(member)
860
870
  elif content_types & ContentType.CODE_CONTENT.value:
861
- to_return.append(filename)
871
+ to_return.append(member)
862
872
  else:
863
873
  if content_types & ContentType.USER_CONTENT.value:
864
874
  # Everything else is user content
865
- to_return.append(filename)
875
+ to_return.append(member)
866
876
  return to_return
867
877
 
868
878
  @classmethod
@@ -57,6 +57,15 @@ class PackagingBackend(ABC):
57
57
  """Open the archive from the given content."""
58
58
  pass
59
59
 
60
+ @classmethod
61
+ @abstractmethod
62
+ def cls_member_name(cls, member: Union[Any, str]) -> str:
63
+ """
64
+ Returns the name of the member as a string.
65
+ This is used to ensure consistent naming across different archive formats.
66
+ """
67
+ pass
68
+
60
69
  @classmethod
61
70
  @abstractmethod
62
71
  def cls_has_member(cls, archive: Any, name: str) -> bool:
@@ -72,14 +81,20 @@ class PackagingBackend(ABC):
72
81
  def cls_extract_members(
73
82
  cls,
74
83
  archive: Any,
75
- members: Optional[List[str]] = None,
84
+ members: Optional[List[Any]] = None,
76
85
  dest_dir: str = ".",
77
86
  ) -> None:
78
87
  pass
79
88
 
80
89
  @classmethod
81
90
  @abstractmethod
82
- def cls_list_members(cls, archive: Any) -> Optional[List[str]]:
91
+ def cls_list_names(cls, archive: Any) -> Optional[List[str]]:
92
+ pass
93
+
94
+ @classmethod
95
+ @abstractmethod
96
+ def cls_list_members(cls, archive: Any) -> Optional[List[Any]]:
97
+ """List all members in the archive."""
83
98
  pass
84
99
 
85
100
  def has_member(self, name: str) -> bool:
@@ -93,17 +108,17 @@ class PackagingBackend(ABC):
93
108
  raise ValueError("Cannot get member from an uncreated archive")
94
109
 
95
110
  def extract_members(
96
- self, members: Optional[List[str]] = None, dest_dir: str = "."
111
+ self, members: Optional[List[Any]] = None, dest_dir: str = "."
97
112
  ) -> None:
98
113
  if self._archive:
99
114
  self.cls_extract_members(self._archive, members, dest_dir)
100
115
  else:
101
116
  raise ValueError("Cannot extract from an uncreated archive")
102
117
 
103
- def list_members(self) -> Optional[List[str]]:
118
+ def list_names(self) -> Optional[List[str]]:
104
119
  if self._archive:
105
- return self.cls_list_members(self._archive)
106
- raise ValueError("Cannot list members from an uncreated archive")
120
+ return self.cls_list_names(self._archive)
121
+ raise ValueError("Cannot list names from an uncreated archive")
107
122
 
108
123
  def __enter__(self):
109
124
  self.create()