ob-metaflow 2.16.8.2rc1__py2.py3-none-any.whl → 2.17.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/_vendor/click/core.py +3 -4
- metaflow/_vendor/imghdr/__init__.py +7 -1
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cli.py +11 -2
- metaflow/cli_components/run_cmds.py +0 -15
- metaflow/client/core.py +6 -1
- metaflow/extension_support/__init__.py +4 -3
- metaflow/flowspec.py +1 -113
- metaflow/graph.py +10 -134
- metaflow/lint.py +3 -70
- metaflow/metaflow_environment.py +14 -6
- metaflow/package/__init__.py +18 -9
- metaflow/packaging_sys/__init__.py +53 -43
- metaflow/packaging_sys/backend.py +21 -6
- metaflow/packaging_sys/tar_backend.py +16 -3
- metaflow/packaging_sys/v1.py +21 -21
- metaflow/plugins/argo/argo_client.py +31 -14
- metaflow/plugins/argo/argo_workflows.py +67 -22
- metaflow/plugins/argo/argo_workflows_cli.py +348 -85
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +69 -0
- metaflow/plugins/aws/step_functions/step_functions.py +0 -6
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_modules/basic.py +3 -14
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +7 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_job.py +8 -2
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +26 -28
- metaflow/plugins/pypi/conda_decorator.py +4 -2
- metaflow/runner/click_api.py +14 -7
- metaflow/runner/deployer.py +160 -7
- metaflow/runner/subprocess_manager.py +20 -12
- metaflow/runtime.py +27 -102
- metaflow/task.py +25 -46
- metaflow/user_decorators/mutable_flow.py +3 -1
- metaflow/util.py +0 -29
- metaflow/vendor.py +23 -6
- metaflow/version.py +1 -1
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/RECORD +62 -45
- {ob_metaflow-2.16.8.2rc1.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/Makefile +0 -0
- {ob_metaflow-2.16.8.2rc1.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {ob_metaflow-2.16.8.2rc1.data → ob_metaflow-2.17.0.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.16.8.2rc1.dist-info → ob_metaflow-2.17.0.1.dist-info}/top_level.txt +0 -0
metaflow/graph.py
CHANGED
|
@@ -68,8 +68,6 @@ class DAGNode(object):
|
|
|
68
68
|
self.has_tail_next = False
|
|
69
69
|
self.invalid_tail_next = False
|
|
70
70
|
self.num_args = 0
|
|
71
|
-
self.switch_cases = {}
|
|
72
|
-
self.condition = None
|
|
73
71
|
self.foreach_param = None
|
|
74
72
|
self.num_parallel = 0
|
|
75
73
|
self.parallel_foreach = False
|
|
@@ -85,56 +83,6 @@ class DAGNode(object):
|
|
|
85
83
|
def _expr_str(self, expr):
|
|
86
84
|
return "%s.%s" % (expr.value.id, expr.attr)
|
|
87
85
|
|
|
88
|
-
def _parse_switch_dict(self, dict_node):
|
|
89
|
-
switch_cases = {}
|
|
90
|
-
|
|
91
|
-
if isinstance(dict_node, ast.Dict):
|
|
92
|
-
for key, value in zip(dict_node.keys, dict_node.values):
|
|
93
|
-
case_key = None
|
|
94
|
-
|
|
95
|
-
# handle string literals
|
|
96
|
-
if isinstance(key, ast.Str):
|
|
97
|
-
case_key = key.s
|
|
98
|
-
elif isinstance(key, ast.Constant) and isinstance(key.value, str):
|
|
99
|
-
case_key = key.value
|
|
100
|
-
elif isinstance(key, ast.Attribute):
|
|
101
|
-
if isinstance(key.value, ast.Attribute) and isinstance(
|
|
102
|
-
key.value.value, ast.Name
|
|
103
|
-
):
|
|
104
|
-
# This handles self.config.some_key
|
|
105
|
-
if key.value.value.id == "self":
|
|
106
|
-
config_var = key.value.attr
|
|
107
|
-
config_key = key.attr
|
|
108
|
-
case_key = f"config:{config_var}.{config_key}"
|
|
109
|
-
else:
|
|
110
|
-
return None
|
|
111
|
-
else:
|
|
112
|
-
return None
|
|
113
|
-
|
|
114
|
-
# handle variables or other dynamic expressions - not allowed
|
|
115
|
-
elif isinstance(key, ast.Name):
|
|
116
|
-
return None
|
|
117
|
-
else:
|
|
118
|
-
# can't statically analyze this key
|
|
119
|
-
return None
|
|
120
|
-
|
|
121
|
-
if case_key is None:
|
|
122
|
-
return None
|
|
123
|
-
|
|
124
|
-
# extract the step name from the value
|
|
125
|
-
if isinstance(value, ast.Attribute) and isinstance(
|
|
126
|
-
value.value, ast.Name
|
|
127
|
-
):
|
|
128
|
-
if value.value.id == "self":
|
|
129
|
-
step_name = value.attr
|
|
130
|
-
switch_cases[case_key] = step_name
|
|
131
|
-
else:
|
|
132
|
-
return None
|
|
133
|
-
else:
|
|
134
|
-
return None
|
|
135
|
-
|
|
136
|
-
return switch_cases if switch_cases else None
|
|
137
|
-
|
|
138
86
|
def _parse(self, func_ast, lineno):
|
|
139
87
|
self.num_args = len(func_ast.args.args)
|
|
140
88
|
tail = func_ast.body[-1]
|
|
@@ -156,38 +104,7 @@ class DAGNode(object):
|
|
|
156
104
|
self.has_tail_next = True
|
|
157
105
|
self.invalid_tail_next = True
|
|
158
106
|
self.tail_next_lineno = lineno + tail.lineno - 1
|
|
159
|
-
|
|
160
|
-
# Check if first argument is a dictionary (switch case)
|
|
161
|
-
if (
|
|
162
|
-
len(tail.value.args) == 1
|
|
163
|
-
and isinstance(tail.value.args[0], ast.Dict)
|
|
164
|
-
and any(k.arg == "condition" for k in tail.value.keywords)
|
|
165
|
-
):
|
|
166
|
-
# This is a switch statement
|
|
167
|
-
switch_cases = self._parse_switch_dict(tail.value.args[0])
|
|
168
|
-
condition_name = None
|
|
169
|
-
|
|
170
|
-
# Get condition parameter
|
|
171
|
-
for keyword in tail.value.keywords:
|
|
172
|
-
if keyword.arg == "condition":
|
|
173
|
-
if isinstance(keyword.value, ast.Str):
|
|
174
|
-
condition_name = keyword.value.s
|
|
175
|
-
elif isinstance(keyword.value, ast.Constant) and isinstance(
|
|
176
|
-
keyword.value.value, str
|
|
177
|
-
):
|
|
178
|
-
condition_name = keyword.value.value
|
|
179
|
-
break
|
|
180
|
-
|
|
181
|
-
if switch_cases and condition_name:
|
|
182
|
-
self.type = "split-switch"
|
|
183
|
-
self.condition = condition_name
|
|
184
|
-
self.switch_cases = switch_cases
|
|
185
|
-
self.out_funcs = list(switch_cases.values())
|
|
186
|
-
self.invalid_tail_next = False
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
else:
|
|
190
|
-
self.out_funcs = [e.attr for e in tail.value.args]
|
|
107
|
+
self.out_funcs = [e.attr for e in tail.value.args]
|
|
191
108
|
|
|
192
109
|
keywords = dict(
|
|
193
110
|
(k.arg, getattr(k.value, "s", None)) for k in tail.value.keywords
|
|
@@ -234,7 +151,6 @@ class DAGNode(object):
|
|
|
234
151
|
has_tail_next={0.has_tail_next} (line {0.tail_next_lineno})
|
|
235
152
|
invalid_tail_next={0.invalid_tail_next}
|
|
236
153
|
foreach_param={0.foreach_param}
|
|
237
|
-
condition={0.condition}
|
|
238
154
|
parallel_step={0.parallel_step}
|
|
239
155
|
parallel_foreach={0.parallel_foreach}
|
|
240
156
|
-> {out}""".format(
|
|
@@ -303,8 +219,6 @@ class FlowGraph(object):
|
|
|
303
219
|
if node.type in ("split", "foreach"):
|
|
304
220
|
node.split_parents = split_parents
|
|
305
221
|
split_parents = split_parents + [node.name]
|
|
306
|
-
elif node.type == "split-switch":
|
|
307
|
-
node.split_parents = split_parents
|
|
308
222
|
elif node.type == "join":
|
|
309
223
|
# ignore joins without splits
|
|
310
224
|
if split_parents:
|
|
@@ -345,37 +259,15 @@ class FlowGraph(object):
|
|
|
345
259
|
def output_dot(self):
|
|
346
260
|
def edge_specs():
|
|
347
261
|
for node in self.nodes.values():
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
for case_value, step_name in node.switch_cases.items():
|
|
351
|
-
yield (
|
|
352
|
-
'{0} -> {1} [label="{2}" color="blue" fontcolor="blue"];'.format(
|
|
353
|
-
node.name, step_name, case_value
|
|
354
|
-
)
|
|
355
|
-
)
|
|
356
|
-
else:
|
|
357
|
-
for edge in node.out_funcs:
|
|
358
|
-
yield "%s -> %s;" % (node.name, edge)
|
|
262
|
+
for edge in node.out_funcs:
|
|
263
|
+
yield "%s -> %s;" % (node.name, edge)
|
|
359
264
|
|
|
360
265
|
def node_specs():
|
|
361
266
|
for node in self.nodes.values():
|
|
362
|
-
if node.
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
)
|
|
367
|
-
yield (
|
|
368
|
-
'"{0.name}" '
|
|
369
|
-
'[ label = <<b>{0.name}</b><br/><font point-size="9">{condition}</font>> '
|
|
370
|
-
' fontname = "Helvetica" '
|
|
371
|
-
' shape = "hexagon" '
|
|
372
|
-
' style = "filled" fillcolor = "lightgreen" ];'
|
|
373
|
-
).format(node, condition=condition_label)
|
|
374
|
-
else:
|
|
375
|
-
nodetype = "join" if node.num_args > 1 else node.type
|
|
376
|
-
yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
|
|
377
|
-
node, type=nodetype
|
|
378
|
-
)
|
|
267
|
+
nodetype = "join" if node.num_args > 1 else node.type
|
|
268
|
+
yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
|
|
269
|
+
node, type=nodetype
|
|
270
|
+
)
|
|
379
271
|
|
|
380
272
|
return (
|
|
381
273
|
"digraph {0.name} {{\n"
|
|
@@ -399,8 +291,6 @@ class FlowGraph(object):
|
|
|
399
291
|
if node.parallel_foreach:
|
|
400
292
|
return "split-parallel"
|
|
401
293
|
return "split-foreach"
|
|
402
|
-
elif node.type == "split-switch":
|
|
403
|
-
return "split-switch"
|
|
404
294
|
return "unknown" # Should never happen
|
|
405
295
|
|
|
406
296
|
def node_to_dict(name, node):
|
|
@@ -435,9 +325,6 @@ class FlowGraph(object):
|
|
|
435
325
|
d["foreach_artifact"] = node.foreach_param
|
|
436
326
|
elif d["type"] == "split-parallel":
|
|
437
327
|
d["num_parallel"] = node.num_parallel
|
|
438
|
-
elif d["type"] == "split-switch":
|
|
439
|
-
d["condition"] = node.condition
|
|
440
|
-
d["switch_cases"] = node.switch_cases
|
|
441
328
|
if node.matching_join:
|
|
442
329
|
d["matching_join"] = node.matching_join
|
|
443
330
|
return d
|
|
@@ -452,8 +339,8 @@ class FlowGraph(object):
|
|
|
452
339
|
steps_info[cur_name] = node_dict
|
|
453
340
|
resulting_list.append(cur_name)
|
|
454
341
|
|
|
455
|
-
|
|
456
|
-
|
|
342
|
+
if cur_node.type not in ("start", "linear", "join"):
|
|
343
|
+
# We need to look at the different branches for this
|
|
457
344
|
resulting_list.append(
|
|
458
345
|
[
|
|
459
346
|
populate_block(s, cur_node.matching_join)
|
|
@@ -461,19 +348,8 @@ class FlowGraph(object):
|
|
|
461
348
|
]
|
|
462
349
|
)
|
|
463
350
|
cur_name = cur_node.matching_join
|
|
464
|
-
elif node_type == "split-switch":
|
|
465
|
-
all_paths = [
|
|
466
|
-
populate_block(s, end_name) for s in cur_node.out_funcs
|
|
467
|
-
]
|
|
468
|
-
resulting_list.append(all_paths)
|
|
469
|
-
cur_name = end_name
|
|
470
351
|
else:
|
|
471
|
-
|
|
472
|
-
if cur_node.out_funcs:
|
|
473
|
-
cur_name = cur_node.out_funcs[0]
|
|
474
|
-
else:
|
|
475
|
-
# handles terminal nodes or when we jump to 'end_name'.
|
|
476
|
-
break
|
|
352
|
+
cur_name = cur_node.out_funcs[0]
|
|
477
353
|
return resulting_list
|
|
478
354
|
|
|
479
355
|
graph_structure = populate_block("start", "end")
|
metaflow/lint.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from .exception import MetaflowException
|
|
3
|
-
from .util import all_equal
|
|
3
|
+
from .util import all_equal
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class LintWarn(MetaflowException):
|
|
@@ -134,13 +134,7 @@ def check_valid_transitions(graph):
|
|
|
134
134
|
msg = (
|
|
135
135
|
"Step *{0.name}* specifies an invalid self.next() transition. "
|
|
136
136
|
"Make sure the self.next() expression matches with one of the "
|
|
137
|
-
"supported transition types
|
|
138
|
-
" • Linear: self.next(self.step_name)\n"
|
|
139
|
-
" • Fan-out: self.next(self.step1, self.step2, ...)\n"
|
|
140
|
-
" • Foreach: self.next(self.step, foreach='variable')\n"
|
|
141
|
-
" • Switch: self.next({{\"key\": self.step, ...}}, condition='variable')\n\n"
|
|
142
|
-
"For switch statements, keys must be string literals or config expressions "
|
|
143
|
-
"(self.config.key_name), not variables or numbers."
|
|
137
|
+
"supported transition types."
|
|
144
138
|
)
|
|
145
139
|
for node in graph:
|
|
146
140
|
if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
|
|
@@ -238,13 +232,7 @@ def check_split_join_balance(graph):
|
|
|
238
232
|
new_stack = split_stack
|
|
239
233
|
elif node.type in ("split", "foreach"):
|
|
240
234
|
new_stack = split_stack + [("split", node.out_funcs)]
|
|
241
|
-
elif node.type == "split-switch":
|
|
242
|
-
# For a switch, continue traversal down each path with the same stack
|
|
243
|
-
for n in node.out_funcs:
|
|
244
|
-
traverse(graph[n], split_stack)
|
|
245
|
-
return
|
|
246
235
|
elif node.type == "end":
|
|
247
|
-
new_stack = split_stack
|
|
248
236
|
if split_stack:
|
|
249
237
|
_, split_roots = split_stack.pop()
|
|
250
238
|
roots = ", ".join(split_roots)
|
|
@@ -252,25 +240,10 @@ def check_split_join_balance(graph):
|
|
|
252
240
|
msg0.format(roots=roots), node.func_lineno, node.source_file
|
|
253
241
|
)
|
|
254
242
|
elif node.type == "join":
|
|
255
|
-
new_stack = split_stack
|
|
256
243
|
if split_stack:
|
|
257
244
|
_, split_roots = split_stack[-1]
|
|
258
245
|
new_stack = split_stack[:-1]
|
|
259
|
-
|
|
260
|
-
# Identify the split this join corresponds to from its parentage.
|
|
261
|
-
split_node_name = node.split_parents[-1]
|
|
262
|
-
|
|
263
|
-
# Resolve each incoming function to its root branch from the split.
|
|
264
|
-
resolved_branches = set()
|
|
265
|
-
for in_node_name in node.in_funcs:
|
|
266
|
-
branch = get_split_branch_for_node(
|
|
267
|
-
graph, in_node_name, split_node_name
|
|
268
|
-
)
|
|
269
|
-
if branch:
|
|
270
|
-
resolved_branches.add(branch)
|
|
271
|
-
|
|
272
|
-
# compares the set of resolved branches against the expected branches from the split.
|
|
273
|
-
if len(resolved_branches) != len(split_roots):
|
|
246
|
+
if len(node.in_funcs) != len(split_roots):
|
|
274
247
|
paths = ", ".join(node.in_funcs)
|
|
275
248
|
roots = ", ".join(split_roots)
|
|
276
249
|
raise LintWarn(
|
|
@@ -293,8 +266,6 @@ def check_split_join_balance(graph):
|
|
|
293
266
|
|
|
294
267
|
if not all_equal(map(parents, node.in_funcs)):
|
|
295
268
|
raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
|
|
296
|
-
else:
|
|
297
|
-
new_stack = split_stack
|
|
298
269
|
|
|
299
270
|
for n in node.out_funcs:
|
|
300
271
|
traverse(graph[n], new_stack)
|
|
@@ -302,44 +273,6 @@ def check_split_join_balance(graph):
|
|
|
302
273
|
traverse(graph["start"], [])
|
|
303
274
|
|
|
304
275
|
|
|
305
|
-
@linter.ensure_static_graph
|
|
306
|
-
@linter.check
|
|
307
|
-
def check_switch_splits(graph):
|
|
308
|
-
"""Check conditional split constraints"""
|
|
309
|
-
msg0 = (
|
|
310
|
-
"Step *{0.name}* is a switch split but defines {num} transitions. "
|
|
311
|
-
"Switch splits must define at least 2 transitions."
|
|
312
|
-
)
|
|
313
|
-
msg1 = "Step *{0.name}* is a switch split but has no condition variable."
|
|
314
|
-
msg2 = "Step *{0.name}* is a switch split but has no switch cases defined."
|
|
315
|
-
|
|
316
|
-
for node in graph:
|
|
317
|
-
if node.type == "split-switch":
|
|
318
|
-
# Check at least 2 outputs
|
|
319
|
-
if len(node.out_funcs) < 2:
|
|
320
|
-
raise LintWarn(
|
|
321
|
-
msg0.format(node, num=len(node.out_funcs)),
|
|
322
|
-
node.func_lineno,
|
|
323
|
-
node.source_file,
|
|
324
|
-
)
|
|
325
|
-
|
|
326
|
-
# Check condition exists
|
|
327
|
-
if not node.condition:
|
|
328
|
-
raise LintWarn(
|
|
329
|
-
msg1.format(node),
|
|
330
|
-
node.func_lineno,
|
|
331
|
-
node.source_file,
|
|
332
|
-
)
|
|
333
|
-
|
|
334
|
-
# Check switch cases exist
|
|
335
|
-
if not node.switch_cases:
|
|
336
|
-
raise LintWarn(
|
|
337
|
-
msg2.format(node),
|
|
338
|
-
node.func_lineno,
|
|
339
|
-
node.source_file,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
|
|
343
276
|
@linter.ensure_static_graph
|
|
344
277
|
@linter.check
|
|
345
278
|
def check_empty_foreaches(graph):
|
metaflow/metaflow_environment.py
CHANGED
|
@@ -203,6 +203,19 @@ class MetaflowEnvironment(object):
|
|
|
203
203
|
"mfcontent_version": 1,
|
|
204
204
|
}
|
|
205
205
|
)
|
|
206
|
+
|
|
207
|
+
extra_exports = []
|
|
208
|
+
for k, v in MetaflowPackage.get_post_extract_env_vars(
|
|
209
|
+
code_package_metadata, dest_dir="$(pwd)"
|
|
210
|
+
).items():
|
|
211
|
+
if k.endswith(":"):
|
|
212
|
+
# If the value ends with a colon, we override the existing value
|
|
213
|
+
extra_exports.append("export %s=%s" % (k[:-1], v))
|
|
214
|
+
else:
|
|
215
|
+
extra_exports.append(
|
|
216
|
+
"export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
|
|
217
|
+
)
|
|
218
|
+
|
|
206
219
|
cmds = (
|
|
207
220
|
[
|
|
208
221
|
BASH_MFLOG,
|
|
@@ -226,12 +239,7 @@ class MetaflowEnvironment(object):
|
|
|
226
239
|
+ MetaflowPackage.get_extract_commands(
|
|
227
240
|
code_package_metadata, "job.tar", dest_dir="."
|
|
228
241
|
)
|
|
229
|
-
+
|
|
230
|
-
"export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
|
|
231
|
-
for k, v in MetaflowPackage.get_post_extract_env_vars(
|
|
232
|
-
code_package_metadata, dest_dir="."
|
|
233
|
-
).items()
|
|
234
|
-
]
|
|
242
|
+
+ extra_exports
|
|
235
243
|
+ [
|
|
236
244
|
"mflog 'Task is starting.'",
|
|
237
245
|
"flush_mflogs",
|
metaflow/package/__init__.py
CHANGED
|
@@ -17,7 +17,6 @@ from ..packaging_sys.utils import suffix_filter, walk
|
|
|
17
17
|
from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES
|
|
18
18
|
from ..exception import MetaflowException
|
|
19
19
|
from ..user_configs.config_parameters import dump_config_values
|
|
20
|
-
from ..util import get_metaflow_root
|
|
21
20
|
from .. import R
|
|
22
21
|
|
|
23
22
|
DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
|
|
@@ -76,12 +75,22 @@ class MetaflowPackage(object):
|
|
|
76
75
|
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
|
|
77
76
|
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
|
|
78
77
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
78
|
+
# Be very defensive here to filter modules in case there are
|
|
79
|
+
# some badly behaved modules that have weird values for
|
|
80
|
+
# METAFLOW_PACKAGE_POLICY for example.
|
|
81
|
+
try:
|
|
82
|
+
if (
|
|
83
|
+
m.__name__ in FlowMutatorMeta._import_modules
|
|
84
|
+
or m.__name__ in UserStepDecoratorMeta._import_modules
|
|
85
|
+
or (
|
|
86
|
+
hasattr(m, "METAFLOW_PACKAGE_POLICY")
|
|
87
|
+
and m.METAFLOW_PACKAGE_POLICY == "include"
|
|
88
|
+
)
|
|
89
|
+
):
|
|
90
|
+
return True
|
|
91
|
+
return False
|
|
92
|
+
except:
|
|
93
|
+
return False
|
|
85
94
|
|
|
86
95
|
if mfcontent is None:
|
|
87
96
|
self._mfcontent = MetaflowCodeContentV1(criteria=_module_selector)
|
|
@@ -350,10 +359,10 @@ class MetaflowPackage(object):
|
|
|
350
359
|
"""
|
|
351
360
|
backend = cls.get_backend(pkg_metadata)
|
|
352
361
|
with backend.cls_open(archive) as opened_archive:
|
|
353
|
-
|
|
362
|
+
include_members = MetaflowCodeContent.get_archive_content_members(
|
|
354
363
|
opened_archive, content_types, backend
|
|
355
364
|
)
|
|
356
|
-
backend.
|
|
365
|
+
backend.cls_extract_members(opened_archive, include_members, dest_dir)
|
|
357
366
|
|
|
358
367
|
def user_tuples(self, timeout: Optional[float] = None):
|
|
359
368
|
# Wait for at least the blob to be formed
|
|
@@ -118,9 +118,7 @@ class MetaflowCodeContent:
|
|
|
118
118
|
return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
|
|
119
119
|
|
|
120
120
|
@classmethod
|
|
121
|
-
def get_env_vars_for_packaged_metaflow(
|
|
122
|
-
cls, dest_dir: str
|
|
123
|
-
) -> Optional[Dict[str, str]]:
|
|
121
|
+
def get_env_vars_for_packaged_metaflow(cls, dest_dir: str) -> Dict[str, str]:
|
|
124
122
|
"""
|
|
125
123
|
Get the environment variables that are needed to run Metaflow when it is
|
|
126
124
|
packaged. This is typically used to set the PYTHONPATH to include the
|
|
@@ -128,17 +126,19 @@ class MetaflowCodeContent:
|
|
|
128
126
|
|
|
129
127
|
Returns
|
|
130
128
|
-------
|
|
131
|
-
|
|
129
|
+
Dict[str, str]
|
|
132
130
|
The environment variables that are needed to run Metaflow when it is
|
|
133
|
-
packaged
|
|
131
|
+
packaged it present.
|
|
134
132
|
"""
|
|
135
|
-
mfcontent_info = cls._extract_mfcontent_info()
|
|
133
|
+
mfcontent_info = cls._extract_mfcontent_info(dest_dir)
|
|
136
134
|
if mfcontent_info is None:
|
|
137
135
|
# No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
|
|
138
136
|
# package so no environment variables to set.
|
|
139
|
-
return
|
|
137
|
+
return {}
|
|
140
138
|
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
|
141
|
-
|
|
139
|
+
v = handling_cls.get_post_extract_env_vars_impl(dest_dir)
|
|
140
|
+
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
|
|
141
|
+
return v
|
|
142
142
|
|
|
143
143
|
@classmethod
|
|
144
144
|
def get_archive_info(
|
|
@@ -216,15 +216,15 @@ class MetaflowCodeContent:
|
|
|
216
216
|
)
|
|
217
217
|
|
|
218
218
|
@classmethod
|
|
219
|
-
def
|
|
219
|
+
def get_archive_content_members(
|
|
220
220
|
cls,
|
|
221
221
|
archive: Any,
|
|
222
222
|
content_types: Optional[int] = None,
|
|
223
223
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
|
224
|
-
) -> List[
|
|
224
|
+
) -> List[Any]:
|
|
225
225
|
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
|
|
226
226
|
handling_cls = cls._get_mfcontent_class(mfcontent_info)
|
|
227
|
-
return handling_cls.
|
|
227
|
+
return handling_cls.get_archive_content_members_impl(
|
|
228
228
|
mfcontent_info, archive, content_types, packaging_backend
|
|
229
229
|
)
|
|
230
230
|
|
|
@@ -276,7 +276,9 @@ class MetaflowCodeContent:
|
|
|
276
276
|
"Invalid package -- unknown version %s in info: %s"
|
|
277
277
|
% (version_id, cls._mappings)
|
|
278
278
|
)
|
|
279
|
-
|
|
279
|
+
v = cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
|
|
280
|
+
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
|
|
281
|
+
return v
|
|
280
282
|
|
|
281
283
|
# Implement the _impl methods in the base subclass (in this file). These need to
|
|
282
284
|
# happen with as few imports as possible to prevent circular dependencies.
|
|
@@ -337,14 +339,14 @@ class MetaflowCodeContent:
|
|
|
337
339
|
raise NotImplementedError("get_archive_filename_impl not implemented")
|
|
338
340
|
|
|
339
341
|
@classmethod
|
|
340
|
-
def
|
|
342
|
+
def get_archive_content_members_impl(
|
|
341
343
|
cls,
|
|
342
344
|
mfcontent_info: Optional[Dict[str, Any]],
|
|
343
345
|
archive: Any,
|
|
344
346
|
content_types: Optional[int] = None,
|
|
345
347
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
|
346
|
-
) -> List[
|
|
347
|
-
raise NotImplementedError("
|
|
348
|
+
) -> List[Any]:
|
|
349
|
+
raise NotImplementedError("get_archive_content_members_impl not implemented")
|
|
348
350
|
|
|
349
351
|
@classmethod
|
|
350
352
|
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
|
|
@@ -523,19 +525,22 @@ class MetaflowCodeContent:
|
|
|
523
525
|
return mfcontent_info
|
|
524
526
|
|
|
525
527
|
@classmethod
|
|
526
|
-
def _extract_mfcontent_info(
|
|
527
|
-
|
|
528
|
-
|
|
528
|
+
def _extract_mfcontent_info(
|
|
529
|
+
cls, target_dir: Optional[str] = None
|
|
530
|
+
) -> Optional[Dict[str, Any]]:
|
|
531
|
+
target_dir = target_dir or "_local"
|
|
532
|
+
if target_dir in cls._cached_mfcontent_info:
|
|
533
|
+
return cls._cached_mfcontent_info[target_dir]
|
|
529
534
|
|
|
530
535
|
mfcontent_info = None # type: Optional[Dict[str, Any]]
|
|
531
|
-
if
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
) as f:
|
|
536
|
+
if target_dir == "_local":
|
|
537
|
+
root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
|
|
538
|
+
else:
|
|
539
|
+
root = target_dir
|
|
540
|
+
if os.path.exists(os.path.join(root, MFCONTENT_MARKER)):
|
|
541
|
+
with open(os.path.join(root, MFCONTENT_MARKER), "r", encoding="utf-8") as f:
|
|
537
542
|
mfcontent_info = json.load(f)
|
|
538
|
-
cls._cached_mfcontent_info[
|
|
543
|
+
cls._cached_mfcontent_info[target_dir] = mfcontent_info
|
|
539
544
|
return mfcontent_info
|
|
540
545
|
|
|
541
546
|
def get_package_version(self) -> int:
|
|
@@ -627,13 +632,13 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
|
|
|
627
632
|
return None
|
|
628
633
|
|
|
629
634
|
@classmethod
|
|
630
|
-
def
|
|
635
|
+
def get_archive_content_members_impl(
|
|
631
636
|
cls,
|
|
632
637
|
mfcontent_info: Optional[Dict[str, Any]],
|
|
633
638
|
archive: Any,
|
|
634
639
|
content_types: Optional[int] = None,
|
|
635
640
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
|
636
|
-
) -> List[
|
|
641
|
+
) -> List[Any]:
|
|
637
642
|
"""
|
|
638
643
|
For V0, we use a static list of known files to classify the content
|
|
639
644
|
"""
|
|
@@ -649,16 +654,20 @@ class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
|
|
|
649
654
|
"condav2-1.cnd": ContentType.OTHER_CONTENT.value,
|
|
650
655
|
}
|
|
651
656
|
to_return = []
|
|
652
|
-
for
|
|
657
|
+
for member in packaging_backend.cls_list_members(archive):
|
|
658
|
+
filename = packaging_backend.cls_member_name(member)
|
|
659
|
+
added = False
|
|
653
660
|
for prefix, classification in known_prefixes.items():
|
|
654
661
|
if (
|
|
655
662
|
prefix[-1] == "/" and filename.startswith(prefix)
|
|
656
663
|
) or prefix == filename:
|
|
657
664
|
if content_types & classification:
|
|
658
|
-
to_return.append(
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
665
|
+
to_return.append(member)
|
|
666
|
+
added = True
|
|
667
|
+
break
|
|
668
|
+
if not added and content_types & ContentType.USER_CONTENT.value:
|
|
669
|
+
# Everything else is user content
|
|
670
|
+
to_return.append(member)
|
|
662
671
|
return to_return
|
|
663
672
|
|
|
664
673
|
@classmethod
|
|
@@ -705,7 +714,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
|
705
714
|
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
|
706
715
|
) -> str:
|
|
707
716
|
if in_archive:
|
|
708
|
-
return filename
|
|
717
|
+
return os.path.join(cls._other_dir, filename)
|
|
709
718
|
return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
|
|
710
719
|
|
|
711
720
|
@classmethod
|
|
@@ -713,7 +722,7 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
|
713
722
|
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
|
|
714
723
|
) -> str:
|
|
715
724
|
if in_archive:
|
|
716
|
-
return filename
|
|
725
|
+
return os.path.join(cls._code_dir, filename)
|
|
717
726
|
return os.path.join(get_metaflow_root(), filename)
|
|
718
727
|
|
|
719
728
|
@classmethod
|
|
@@ -832,37 +841,38 @@ class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
|
|
|
832
841
|
return None
|
|
833
842
|
|
|
834
843
|
@classmethod
|
|
835
|
-
def
|
|
844
|
+
def get_archive_content_members_impl(
|
|
836
845
|
cls,
|
|
837
846
|
mfcontent_info: Optional[Dict[str, Any]],
|
|
838
847
|
archive: Any,
|
|
839
848
|
content_types: Optional[int] = None,
|
|
840
849
|
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
|
|
841
|
-
) -> List[
|
|
850
|
+
) -> List[Any]:
|
|
842
851
|
to_return = []
|
|
843
852
|
module_content = set(mfcontent_info.get("module_files", []))
|
|
844
|
-
for
|
|
853
|
+
for member in packaging_backend.cls_list_members(archive):
|
|
854
|
+
filename = packaging_backend.cls_member_name(member)
|
|
845
855
|
if filename.startswith(cls._other_dir) and (
|
|
846
856
|
content_types & ContentType.OTHER_CONTENT.value
|
|
847
857
|
):
|
|
848
|
-
to_return.append(
|
|
858
|
+
to_return.append(member)
|
|
849
859
|
elif filename.startswith(cls._code_dir):
|
|
850
860
|
# Special case for marker which is a other content even if in code.
|
|
851
|
-
if filename ==
|
|
861
|
+
if filename == MFCONTENT_MARKER:
|
|
852
862
|
if content_types & ContentType.OTHER_CONTENT.value:
|
|
853
|
-
to_return.append(
|
|
863
|
+
to_return.append(member)
|
|
854
864
|
else:
|
|
855
865
|
continue
|
|
856
866
|
# Here it is either module or code
|
|
857
867
|
if os.path.join(cls._code_dir, filename) in module_content:
|
|
858
868
|
if content_types & ContentType.MODULE_CONTENT.value:
|
|
859
|
-
to_return.append(
|
|
869
|
+
to_return.append(member)
|
|
860
870
|
elif content_types & ContentType.CODE_CONTENT.value:
|
|
861
|
-
to_return.append(
|
|
871
|
+
to_return.append(member)
|
|
862
872
|
else:
|
|
863
873
|
if content_types & ContentType.USER_CONTENT.value:
|
|
864
874
|
# Everything else is user content
|
|
865
|
-
to_return.append(
|
|
875
|
+
to_return.append(member)
|
|
866
876
|
return to_return
|
|
867
877
|
|
|
868
878
|
@classmethod
|