ob-metaflow 2.16.8.2rc2__py2.py3-none-any.whl → 2.17.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (62) hide show
  1. metaflow/_vendor/click/core.py +3 -4
  2. metaflow/_vendor/imghdr/__init__.py +7 -1
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cli.py +11 -2
  21. metaflow/cli_components/run_cmds.py +0 -15
  22. metaflow/client/core.py +6 -1
  23. metaflow/extension_support/__init__.py +4 -3
  24. metaflow/flowspec.py +1 -113
  25. metaflow/graph.py +10 -134
  26. metaflow/lint.py +3 -70
  27. metaflow/metaflow_environment.py +14 -6
  28. metaflow/package/__init__.py +18 -9
  29. metaflow/packaging_sys/__init__.py +53 -43
  30. metaflow/packaging_sys/backend.py +21 -6
  31. metaflow/packaging_sys/tar_backend.py +16 -3
  32. metaflow/packaging_sys/v1.py +21 -21
  33. metaflow/plugins/argo/argo_client.py +31 -14
  34. metaflow/plugins/argo/argo_workflows.py +67 -22
  35. metaflow/plugins/argo/argo_workflows_cli.py +348 -85
  36. metaflow/plugins/argo/argo_workflows_deployer_objects.py +69 -0
  37. metaflow/plugins/aws/step_functions/step_functions.py +0 -6
  38. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  39. metaflow/plugins/cards/card_modules/basic.py +3 -14
  40. metaflow/plugins/cards/card_modules/convert_to_native_type.py +7 -1
  41. metaflow/plugins/kubernetes/kubernetes_decorator.py +1 -1
  42. metaflow/plugins/kubernetes/kubernetes_job.py +8 -2
  43. metaflow/plugins/kubernetes/kubernetes_jobsets.py +26 -28
  44. metaflow/plugins/pypi/conda_decorator.py +4 -2
  45. metaflow/runner/click_api.py +14 -7
  46. metaflow/runner/deployer.py +160 -7
  47. metaflow/runner/subprocess_manager.py +20 -12
  48. metaflow/runtime.py +27 -102
  49. metaflow/task.py +25 -46
  50. metaflow/user_decorators/mutable_flow.py +3 -1
  51. metaflow/util.py +0 -29
  52. metaflow/vendor.py +23 -6
  53. metaflow/version.py +1 -1
  54. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/METADATA +2 -2
  55. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/RECORD +62 -45
  56. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/Makefile +0 -0
  57. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  58. {ob_metaflow-2.16.8.2rc2.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  59. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/WHEEL +0 -0
  60. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/entry_points.txt +0 -0
  61. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/licenses/LICENSE +0 -0
  62. {ob_metaflow-2.16.8.2rc2.dist-info → ob_metaflow-2.17.0.1.dist-info}/top_level.txt +0 -0
metaflow/graph.py CHANGED
@@ -68,8 +68,6 @@ class DAGNode(object):
68
68
  self.has_tail_next = False
69
69
  self.invalid_tail_next = False
70
70
  self.num_args = 0
71
- self.switch_cases = {}
72
- self.condition = None
73
71
  self.foreach_param = None
74
72
  self.num_parallel = 0
75
73
  self.parallel_foreach = False
@@ -85,56 +83,6 @@ class DAGNode(object):
85
83
  def _expr_str(self, expr):
86
84
  return "%s.%s" % (expr.value.id, expr.attr)
87
85
 
88
- def _parse_switch_dict(self, dict_node):
89
- switch_cases = {}
90
-
91
- if isinstance(dict_node, ast.Dict):
92
- for key, value in zip(dict_node.keys, dict_node.values):
93
- case_key = None
94
-
95
- # handle string literals
96
- if isinstance(key, ast.Str):
97
- case_key = key.s
98
- elif isinstance(key, ast.Constant) and isinstance(key.value, str):
99
- case_key = key.value
100
- elif isinstance(key, ast.Attribute):
101
- if isinstance(key.value, ast.Attribute) and isinstance(
102
- key.value.value, ast.Name
103
- ):
104
- # This handles self.config.some_key
105
- if key.value.value.id == "self":
106
- config_var = key.value.attr
107
- config_key = key.attr
108
- case_key = f"config:{config_var}.{config_key}"
109
- else:
110
- return None
111
- else:
112
- return None
113
-
114
- # handle variables or other dynamic expressions - not allowed
115
- elif isinstance(key, ast.Name):
116
- return None
117
- else:
118
- # can't statically analyze this key
119
- return None
120
-
121
- if case_key is None:
122
- return None
123
-
124
- # extract the step name from the value
125
- if isinstance(value, ast.Attribute) and isinstance(
126
- value.value, ast.Name
127
- ):
128
- if value.value.id == "self":
129
- step_name = value.attr
130
- switch_cases[case_key] = step_name
131
- else:
132
- return None
133
- else:
134
- return None
135
-
136
- return switch_cases if switch_cases else None
137
-
138
86
  def _parse(self, func_ast, lineno):
139
87
  self.num_args = len(func_ast.args.args)
140
88
  tail = func_ast.body[-1]
@@ -156,38 +104,7 @@ class DAGNode(object):
156
104
  self.has_tail_next = True
157
105
  self.invalid_tail_next = True
158
106
  self.tail_next_lineno = lineno + tail.lineno - 1
159
-
160
- # Check if first argument is a dictionary (switch case)
161
- if (
162
- len(tail.value.args) == 1
163
- and isinstance(tail.value.args[0], ast.Dict)
164
- and any(k.arg == "condition" for k in tail.value.keywords)
165
- ):
166
- # This is a switch statement
167
- switch_cases = self._parse_switch_dict(tail.value.args[0])
168
- condition_name = None
169
-
170
- # Get condition parameter
171
- for keyword in tail.value.keywords:
172
- if keyword.arg == "condition":
173
- if isinstance(keyword.value, ast.Str):
174
- condition_name = keyword.value.s
175
- elif isinstance(keyword.value, ast.Constant) and isinstance(
176
- keyword.value.value, str
177
- ):
178
- condition_name = keyword.value.value
179
- break
180
-
181
- if switch_cases and condition_name:
182
- self.type = "split-switch"
183
- self.condition = condition_name
184
- self.switch_cases = switch_cases
185
- self.out_funcs = list(switch_cases.values())
186
- self.invalid_tail_next = False
187
- return
188
-
189
- else:
190
- self.out_funcs = [e.attr for e in tail.value.args]
107
+ self.out_funcs = [e.attr for e in tail.value.args]
191
108
 
192
109
  keywords = dict(
193
110
  (k.arg, getattr(k.value, "s", None)) for k in tail.value.keywords
@@ -234,7 +151,6 @@ class DAGNode(object):
234
151
  has_tail_next={0.has_tail_next} (line {0.tail_next_lineno})
235
152
  invalid_tail_next={0.invalid_tail_next}
236
153
  foreach_param={0.foreach_param}
237
- condition={0.condition}
238
154
  parallel_step={0.parallel_step}
239
155
  parallel_foreach={0.parallel_foreach}
240
156
  -> {out}""".format(
@@ -303,8 +219,6 @@ class FlowGraph(object):
303
219
  if node.type in ("split", "foreach"):
304
220
  node.split_parents = split_parents
305
221
  split_parents = split_parents + [node.name]
306
- elif node.type == "split-switch":
307
- node.split_parents = split_parents
308
222
  elif node.type == "join":
309
223
  # ignore joins without splits
310
224
  if split_parents:
@@ -345,37 +259,15 @@ class FlowGraph(object):
345
259
  def output_dot(self):
346
260
  def edge_specs():
347
261
  for node in self.nodes.values():
348
- if node.type == "split-switch":
349
- # Label edges for switch cases
350
- for case_value, step_name in node.switch_cases.items():
351
- yield (
352
- '{0} -> {1} [label="{2}" color="blue" fontcolor="blue"];'.format(
353
- node.name, step_name, case_value
354
- )
355
- )
356
- else:
357
- for edge in node.out_funcs:
358
- yield "%s -> %s;" % (node.name, edge)
262
+ for edge in node.out_funcs:
263
+ yield "%s -> %s;" % (node.name, edge)
359
264
 
360
265
  def node_specs():
361
266
  for node in self.nodes.values():
362
- if node.type == "split-switch":
363
- # Hexagon shape for switch nodes
364
- condition_label = (
365
- f"switch: {node.condition}" if node.condition else "switch"
366
- )
367
- yield (
368
- '"{0.name}" '
369
- '[ label = <<b>{0.name}</b><br/><font point-size="9">{condition}</font>> '
370
- ' fontname = "Helvetica" '
371
- ' shape = "hexagon" '
372
- ' style = "filled" fillcolor = "lightgreen" ];'
373
- ).format(node, condition=condition_label)
374
- else:
375
- nodetype = "join" if node.num_args > 1 else node.type
376
- yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
377
- node, type=nodetype
378
- )
267
+ nodetype = "join" if node.num_args > 1 else node.type
268
+ yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
269
+ node, type=nodetype
270
+ )
379
271
 
380
272
  return (
381
273
  "digraph {0.name} {{\n"
@@ -399,8 +291,6 @@ class FlowGraph(object):
399
291
  if node.parallel_foreach:
400
292
  return "split-parallel"
401
293
  return "split-foreach"
402
- elif node.type == "split-switch":
403
- return "split-switch"
404
294
  return "unknown" # Should never happen
405
295
 
406
296
  def node_to_dict(name, node):
@@ -435,9 +325,6 @@ class FlowGraph(object):
435
325
  d["foreach_artifact"] = node.foreach_param
436
326
  elif d["type"] == "split-parallel":
437
327
  d["num_parallel"] = node.num_parallel
438
- elif d["type"] == "split-switch":
439
- d["condition"] = node.condition
440
- d["switch_cases"] = node.switch_cases
441
328
  if node.matching_join:
442
329
  d["matching_join"] = node.matching_join
443
330
  return d
@@ -452,8 +339,8 @@ class FlowGraph(object):
452
339
  steps_info[cur_name] = node_dict
453
340
  resulting_list.append(cur_name)
454
341
 
455
- node_type = node_to_type(cur_node)
456
- if node_type in ("split-static", "split-foreach"):
342
+ if cur_node.type not in ("start", "linear", "join"):
343
+ # We need to look at the different branches for this
457
344
  resulting_list.append(
458
345
  [
459
346
  populate_block(s, cur_node.matching_join)
@@ -461,19 +348,8 @@ class FlowGraph(object):
461
348
  ]
462
349
  )
463
350
  cur_name = cur_node.matching_join
464
- elif node_type == "split-switch":
465
- all_paths = [
466
- populate_block(s, end_name) for s in cur_node.out_funcs
467
- ]
468
- resulting_list.append(all_paths)
469
- cur_name = end_name
470
351
  else:
471
- # handles only linear, start, and join steps.
472
- if cur_node.out_funcs:
473
- cur_name = cur_node.out_funcs[0]
474
- else:
475
- # handles terminal nodes or when we jump to 'end_name'.
476
- break
352
+ cur_name = cur_node.out_funcs[0]
477
353
  return resulting_list
478
354
 
479
355
  graph_structure = populate_block("start", "end")
metaflow/lint.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  from .exception import MetaflowException
3
- from .util import all_equal, get_split_branch_for_node
3
+ from .util import all_equal
4
4
 
5
5
 
6
6
  class LintWarn(MetaflowException):
@@ -134,13 +134,7 @@ def check_valid_transitions(graph):
134
134
  msg = (
135
135
  "Step *{0.name}* specifies an invalid self.next() transition. "
136
136
  "Make sure the self.next() expression matches with one of the "
137
- "supported transition types:\n"
138
- " • Linear: self.next(self.step_name)\n"
139
- " • Fan-out: self.next(self.step1, self.step2, ...)\n"
140
- " • Foreach: self.next(self.step, foreach='variable')\n"
141
- " • Switch: self.next({{\"key\": self.step, ...}}, condition='variable')\n\n"
142
- "For switch statements, keys must be string literals or config expressions "
143
- "(self.config.key_name), not variables or numbers."
137
+ "supported transition types."
144
138
  )
145
139
  for node in graph:
146
140
  if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
@@ -238,13 +232,7 @@ def check_split_join_balance(graph):
238
232
  new_stack = split_stack
239
233
  elif node.type in ("split", "foreach"):
240
234
  new_stack = split_stack + [("split", node.out_funcs)]
241
- elif node.type == "split-switch":
242
- # For a switch, continue traversal down each path with the same stack
243
- for n in node.out_funcs:
244
- traverse(graph[n], split_stack)
245
- return
246
235
  elif node.type == "end":
247
- new_stack = split_stack
248
236
  if split_stack:
249
237
  _, split_roots = split_stack.pop()
250
238
  roots = ", ".join(split_roots)
@@ -252,25 +240,10 @@ def check_split_join_balance(graph):
252
240
  msg0.format(roots=roots), node.func_lineno, node.source_file
253
241
  )
254
242
  elif node.type == "join":
255
- new_stack = split_stack
256
243
  if split_stack:
257
244
  _, split_roots = split_stack[-1]
258
245
  new_stack = split_stack[:-1]
259
-
260
- # Identify the split this join corresponds to from its parentage.
261
- split_node_name = node.split_parents[-1]
262
-
263
- # Resolve each incoming function to its root branch from the split.
264
- resolved_branches = set()
265
- for in_node_name in node.in_funcs:
266
- branch = get_split_branch_for_node(
267
- graph, in_node_name, split_node_name
268
- )
269
- if branch:
270
- resolved_branches.add(branch)
271
-
272
- # compares the set of resolved branches against the expected branches from the split.
273
- if len(resolved_branches) != len(split_roots):
246
+ if len(node.in_funcs) != len(split_roots):
274
247
  paths = ", ".join(node.in_funcs)
275
248
  roots = ", ".join(split_roots)
276
249
  raise LintWarn(
@@ -293,8 +266,6 @@ def check_split_join_balance(graph):
293
266
 
294
267
  if not all_equal(map(parents, node.in_funcs)):
295
268
  raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
296
- else:
297
- new_stack = split_stack
298
269
 
299
270
  for n in node.out_funcs:
300
271
  traverse(graph[n], new_stack)
@@ -302,44 +273,6 @@ def check_split_join_balance(graph):
302
273
  traverse(graph["start"], [])
303
274
 
304
275
 
305
- @linter.ensure_static_graph
306
- @linter.check
307
- def check_switch_splits(graph):
308
- """Check conditional split constraints"""
309
- msg0 = (
310
- "Step *{0.name}* is a switch split but defines {num} transitions. "
311
- "Switch splits must define at least 2 transitions."
312
- )
313
- msg1 = "Step *{0.name}* is a switch split but has no condition variable."
314
- msg2 = "Step *{0.name}* is a switch split but has no switch cases defined."
315
-
316
- for node in graph:
317
- if node.type == "split-switch":
318
- # Check at least 2 outputs
319
- if len(node.out_funcs) < 2:
320
- raise LintWarn(
321
- msg0.format(node, num=len(node.out_funcs)),
322
- node.func_lineno,
323
- node.source_file,
324
- )
325
-
326
- # Check condition exists
327
- if not node.condition:
328
- raise LintWarn(
329
- msg1.format(node),
330
- node.func_lineno,
331
- node.source_file,
332
- )
333
-
334
- # Check switch cases exist
335
- if not node.switch_cases:
336
- raise LintWarn(
337
- msg2.format(node),
338
- node.func_lineno,
339
- node.source_file,
340
- )
341
-
342
-
343
276
  @linter.ensure_static_graph
344
277
  @linter.check
345
278
  def check_empty_foreaches(graph):
@@ -203,6 +203,19 @@ class MetaflowEnvironment(object):
203
203
  "mfcontent_version": 1,
204
204
  }
205
205
  )
206
+
207
+ extra_exports = []
208
+ for k, v in MetaflowPackage.get_post_extract_env_vars(
209
+ code_package_metadata, dest_dir="$(pwd)"
210
+ ).items():
211
+ if k.endswith(":"):
212
+ # If the value ends with a colon, we override the existing value
213
+ extra_exports.append("export %s=%s" % (k[:-1], v))
214
+ else:
215
+ extra_exports.append(
216
+ "export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
217
+ )
218
+
206
219
  cmds = (
207
220
  [
208
221
  BASH_MFLOG,
@@ -226,12 +239,7 @@ class MetaflowEnvironment(object):
226
239
  + MetaflowPackage.get_extract_commands(
227
240
  code_package_metadata, "job.tar", dest_dir="."
228
241
  )
229
- + [
230
- "export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
231
- for k, v in MetaflowPackage.get_post_extract_env_vars(
232
- code_package_metadata, dest_dir="."
233
- ).items()
234
- ]
242
+ + extra_exports
235
243
  + [
236
244
  "mflog 'Task is starting.'",
237
245
  "flush_mflogs",
@@ -17,7 +17,6 @@ from ..packaging_sys.utils import suffix_filter, walk
17
17
  from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES
18
18
  from ..exception import MetaflowException
19
19
  from ..user_configs.config_parameters import dump_config_values
20
- from ..util import get_metaflow_root
21
20
  from .. import R
22
21
 
23
22
  DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
@@ -76,12 +75,22 @@ class MetaflowPackage(object):
76
75
  from ..user_decorators.user_flow_decorator import FlowMutatorMeta
77
76
  from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
78
77
 
79
- if (
80
- m.__name__ in FlowMutatorMeta._import_modules
81
- or m.__name__ in UserStepDecoratorMeta._import_modules
82
- or hasattr(m, "METAFLOW_PACKAGE")
83
- ):
84
- return True
78
+ # Be very defensive here to filter modules in case there are
79
+ # some badly behaved modules that have weird values for
80
+ # METAFLOW_PACKAGE_POLICY for example.
81
+ try:
82
+ if (
83
+ m.__name__ in FlowMutatorMeta._import_modules
84
+ or m.__name__ in UserStepDecoratorMeta._import_modules
85
+ or (
86
+ hasattr(m, "METAFLOW_PACKAGE_POLICY")
87
+ and m.METAFLOW_PACKAGE_POLICY == "include"
88
+ )
89
+ ):
90
+ return True
91
+ return False
92
+ except:
93
+ return False
85
94
 
86
95
  if mfcontent is None:
87
96
  self._mfcontent = MetaflowCodeContentV1(criteria=_module_selector)
@@ -350,10 +359,10 @@ class MetaflowPackage(object):
350
359
  """
351
360
  backend = cls.get_backend(pkg_metadata)
352
361
  with backend.cls_open(archive) as opened_archive:
353
- include_names = MetaflowCodeContent.get_archive_content_names(
362
+ include_members = MetaflowCodeContent.get_archive_content_members(
354
363
  opened_archive, content_types, backend
355
364
  )
356
- backend.extract_members(include_names, dest_dir)
365
+ backend.cls_extract_members(opened_archive, include_members, dest_dir)
357
366
 
358
367
  def user_tuples(self, timeout: Optional[float] = None):
359
368
  # Wait for at least the blob to be formed
@@ -118,9 +118,7 @@ class MetaflowCodeContent:
118
118
  return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
119
119
 
120
120
  @classmethod
121
- def get_env_vars_for_packaged_metaflow(
122
- cls, dest_dir: str
123
- ) -> Optional[Dict[str, str]]:
121
+ def get_env_vars_for_packaged_metaflow(cls, dest_dir: str) -> Dict[str, str]:
124
122
  """
125
123
  Get the environment variables that are needed to run Metaflow when it is
126
124
  packaged. This is typically used to set the PYTHONPATH to include the
@@ -128,17 +126,19 @@ class MetaflowCodeContent:
128
126
 
129
127
  Returns
130
128
  -------
131
- Optional[Dict[str, str]]
129
+ Dict[str, str]
132
130
  The environment variables that are needed to run Metaflow when it is
133
- packaged -- None if there are no such variables (not packaged for example)
131
+ packaged it present.
134
132
  """
135
- mfcontent_info = cls._extract_mfcontent_info()
133
+ mfcontent_info = cls._extract_mfcontent_info(dest_dir)
136
134
  if mfcontent_info is None:
137
135
  # No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
138
136
  # package so no environment variables to set.
139
- return None
137
+ return {}
140
138
  handling_cls = cls._get_mfcontent_class(mfcontent_info)
141
- return handling_cls.get_post_extract_env_vars_impl(dest_dir)
139
+ v = handling_cls.get_post_extract_env_vars_impl(dest_dir)
140
+ v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
141
+ return v
142
142
 
143
143
  @classmethod
144
144
  def get_archive_info(
@@ -216,15 +216,15 @@ class MetaflowCodeContent:
216
216
  )
217
217
 
218
218
  @classmethod
219
- def get_archive_content_names(
219
+ def get_archive_content_members(
220
220
  cls,
221
221
  archive: Any,
222
222
  content_types: Optional[int] = None,
223
223
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
224
- ) -> List[str]:
224
+ ) -> List[Any]:
225
225
  mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
226
226
  handling_cls = cls._get_mfcontent_class(mfcontent_info)
227
- return handling_cls.get_archive_content_names_impl(
227
+ return handling_cls.get_archive_content_members_impl(
228
228
  mfcontent_info, archive, content_types, packaging_backend
229
229
  )
230
230
 
@@ -276,7 +276,9 @@ class MetaflowCodeContent:
276
276
  "Invalid package -- unknown version %s in info: %s"
277
277
  % (version_id, cls._mappings)
278
278
  )
279
- return cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
279
+ v = cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
280
+ v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
281
+ return v
280
282
 
281
283
  # Implement the _impl methods in the base subclass (in this file). These need to
282
284
  # happen with as few imports as possible to prevent circular dependencies.
@@ -337,14 +339,14 @@ class MetaflowCodeContent:
337
339
  raise NotImplementedError("get_archive_filename_impl not implemented")
338
340
 
339
341
  @classmethod
340
- def get_archive_content_names_impl(
342
+ def get_archive_content_members_impl(
341
343
  cls,
342
344
  mfcontent_info: Optional[Dict[str, Any]],
343
345
  archive: Any,
344
346
  content_types: Optional[int] = None,
345
347
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
346
- ) -> List[str]:
347
- raise NotImplementedError("get_archive_content_names_impl not implemented")
348
+ ) -> List[Any]:
349
+ raise NotImplementedError("get_archive_content_members_impl not implemented")
348
350
 
349
351
  @classmethod
350
352
  def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
@@ -523,19 +525,22 @@ class MetaflowCodeContent:
523
525
  return mfcontent_info
524
526
 
525
527
  @classmethod
526
- def _extract_mfcontent_info(cls) -> Optional[Dict[str, Any]]:
527
- if "_local" in cls._cached_mfcontent_info:
528
- return cls._cached_mfcontent_info["_local"]
528
+ def _extract_mfcontent_info(
529
+ cls, target_dir: Optional[str] = None
530
+ ) -> Optional[Dict[str, Any]]:
531
+ target_dir = target_dir or "_local"
532
+ if target_dir in cls._cached_mfcontent_info:
533
+ return cls._cached_mfcontent_info[target_dir]
529
534
 
530
535
  mfcontent_info = None # type: Optional[Dict[str, Any]]
531
- if os.path.exists(os.path.join(get_metaflow_root(), MFCONTENT_MARKER)):
532
- with open(
533
- os.path.join(get_metaflow_root(), MFCONTENT_MARKER),
534
- "r",
535
- encoding="utf-8",
536
- ) as f:
536
+ if target_dir == "_local":
537
+ root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
538
+ else:
539
+ root = target_dir
540
+ if os.path.exists(os.path.join(root, MFCONTENT_MARKER)):
541
+ with open(os.path.join(root, MFCONTENT_MARKER), "r", encoding="utf-8") as f:
537
542
  mfcontent_info = json.load(f)
538
- cls._cached_mfcontent_info["_local"] = mfcontent_info
543
+ cls._cached_mfcontent_info[target_dir] = mfcontent_info
539
544
  return mfcontent_info
540
545
 
541
546
  def get_package_version(self) -> int:
@@ -627,13 +632,13 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
627
632
  return None
628
633
 
629
634
  @classmethod
630
- def get_archive_content_names_impl(
635
+ def get_archive_content_members_impl(
631
636
  cls,
632
637
  mfcontent_info: Optional[Dict[str, Any]],
633
638
  archive: Any,
634
639
  content_types: Optional[int] = None,
635
640
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
636
- ) -> List[str]:
641
+ ) -> List[Any]:
637
642
  """
638
643
  For V0, we use a static list of known files to classify the content
639
644
  """
@@ -649,16 +654,20 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
649
654
  "condav2-1.cnd": ContentType.OTHER_CONTENT.value,
650
655
  }
651
656
  to_return = []
652
- for filename in packaging_backend.cls_list_members(archive):
657
+ for member in packaging_backend.cls_list_members(archive):
658
+ filename = packaging_backend.cls_member_name(member)
659
+ added = False
653
660
  for prefix, classification in known_prefixes.items():
654
661
  if (
655
662
  prefix[-1] == "/" and filename.startswith(prefix)
656
663
  ) or prefix == filename:
657
664
  if content_types & classification:
658
- to_return.append(filename)
659
- elif content_types & ContentType.USER_CONTENT.value:
660
- # Everything else is user content
661
- to_return.append(filename)
665
+ to_return.append(member)
666
+ added = True
667
+ break
668
+ if not added and content_types & ContentType.USER_CONTENT.value:
669
+ # Everything else is user content
670
+ to_return.append(member)
662
671
  return to_return
663
672
 
664
673
  @classmethod
@@ -705,7 +714,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
705
714
  cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
706
715
  ) -> str:
707
716
  if in_archive:
708
- return filename
717
+ return os.path.join(cls._other_dir, filename)
709
718
  return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
710
719
 
711
720
  @classmethod
@@ -713,7 +722,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
713
722
  cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
714
723
  ) -> str:
715
724
  if in_archive:
716
- return filename
725
+ return os.path.join(cls._code_dir, filename)
717
726
  return os.path.join(get_metaflow_root(), filename)
718
727
 
719
728
  @classmethod
@@ -832,37 +841,38 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
832
841
  return None
833
842
 
834
843
  @classmethod
835
- def get_archive_content_names_impl(
844
+ def get_archive_content_members_impl(
836
845
  cls,
837
846
  mfcontent_info: Optional[Dict[str, Any]],
838
847
  archive: Any,
839
848
  content_types: Optional[int] = None,
840
849
  packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
841
- ) -> List[str]:
850
+ ) -> List[Any]:
842
851
  to_return = []
843
852
  module_content = set(mfcontent_info.get("module_files", []))
844
- for filename in packaging_backend.cls_list_members(archive):
853
+ for member in packaging_backend.cls_list_members(archive):
854
+ filename = packaging_backend.cls_member_name(member)
845
855
  if filename.startswith(cls._other_dir) and (
846
856
  content_types & ContentType.OTHER_CONTENT.value
847
857
  ):
848
- to_return.append(filename)
858
+ to_return.append(member)
849
859
  elif filename.startswith(cls._code_dir):
850
860
  # Special case for marker which is a other content even if in code.
851
- if filename == f"{cls._code_dir}/{MFCONTENT_MARKER}":
861
+ if filename == MFCONTENT_MARKER:
852
862
  if content_types & ContentType.OTHER_CONTENT.value:
853
- to_return.append(filename)
863
+ to_return.append(member)
854
864
  else:
855
865
  continue
856
866
  # Here it is either module or code
857
867
  if os.path.join(cls._code_dir, filename) in module_content:
858
868
  if content_types & ContentType.MODULE_CONTENT.value:
859
- to_return.append(filename)
869
+ to_return.append(member)
860
870
  elif content_types & ContentType.CODE_CONTENT.value:
861
- to_return.append(filename)
871
+ to_return.append(member)
862
872
  else:
863
873
  if content_types & ContentType.USER_CONTENT.value:
864
874
  # Everything else is user content
865
- to_return.append(filename)
875
+ to_return.append(member)
866
876
  return to_return
867
877
 
868
878
  @classmethod