ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/graph.py CHANGED
@@ -2,6 +2,8 @@ import inspect
2
2
  import ast
3
3
  import re
4
4
 
5
+ from itertools import chain
6
+
5
7
 
6
8
  from .util import to_pod
7
9
 
@@ -45,13 +47,17 @@ def deindent_docstring(doc):
45
47
 
46
48
 
47
49
  class DAGNode(object):
48
- def __init__(self, func_ast, decos, doc, source_file, lineno):
50
+ def __init__(
51
+ self, func_ast, decos, wrappers, config_decorators, doc, source_file, lineno
52
+ ):
49
53
  self.name = func_ast.name
50
54
  self.source_file = source_file
51
55
  # lineno is the start line of decorators in source_file
52
56
  # func_ast.lineno is lines from decorators start to def of function
53
57
  self.func_lineno = lineno + func_ast.lineno - 1
54
58
  self.decorators = decos
59
+ self.wrappers = wrappers
60
+ self.config_decorators = config_decorators
55
61
  self.doc = deindent_docstring(doc)
56
62
  self.parallel_step = any(getattr(deco, "IS_PARALLEL", False) for deco in decos)
57
63
 
@@ -62,6 +68,8 @@ class DAGNode(object):
62
68
  self.has_tail_next = False
63
69
  self.invalid_tail_next = False
64
70
  self.num_args = 0
71
+ self.switch_cases = {}
72
+ self.condition = None
65
73
  self.foreach_param = None
66
74
  self.num_parallel = 0
67
75
  self.parallel_foreach = False
@@ -70,6 +78,7 @@ class DAGNode(object):
70
78
  # these attributes are populated by _traverse_graph
71
79
  self.in_funcs = set()
72
80
  self.split_parents = []
81
+ self.split_branches = []
73
82
  self.matching_join = None
74
83
  # these attributes are populated by _postprocess
75
84
  self.is_inside_foreach = False
@@ -77,6 +86,56 @@ class DAGNode(object):
77
86
  def _expr_str(self, expr):
78
87
  return "%s.%s" % (expr.value.id, expr.attr)
79
88
 
89
+ def _parse_switch_dict(self, dict_node):
90
+ switch_cases = {}
91
+
92
+ if isinstance(dict_node, ast.Dict):
93
+ for key, value in zip(dict_node.keys, dict_node.values):
94
+ case_key = None
95
+
96
+ # handle string literals
97
+ if hasattr(ast, "Str") and isinstance(key, ast.Str):
98
+ case_key = key.s
99
+ elif isinstance(key, ast.Constant):
100
+ case_key = key.value
101
+ elif isinstance(key, ast.Attribute):
102
+ if isinstance(key.value, ast.Attribute) and isinstance(
103
+ key.value.value, ast.Name
104
+ ):
105
+ # This handles self.config.some_key
106
+ if key.value.value.id == "self":
107
+ config_var = key.value.attr
108
+ config_key = key.attr
109
+ case_key = f"config:{config_var}.{config_key}"
110
+ else:
111
+ return None
112
+ else:
113
+ return None
114
+
115
+ # handle variables or other dynamic expressions - not allowed
116
+ elif isinstance(key, ast.Name):
117
+ return None
118
+ else:
119
+ # can't statically analyze this key
120
+ return None
121
+
122
+ if case_key is None:
123
+ return None
124
+
125
+ # extract the step name from the value
126
+ if isinstance(value, ast.Attribute) and isinstance(
127
+ value.value, ast.Name
128
+ ):
129
+ if value.value.id == "self":
130
+ step_name = value.attr
131
+ switch_cases[case_key] = step_name
132
+ else:
133
+ return None
134
+ else:
135
+ return None
136
+
137
+ return switch_cases if switch_cases else None
138
+
80
139
  def _parse(self, func_ast, lineno):
81
140
  self.num_args = len(func_ast.args.args)
82
141
  tail = func_ast.body[-1]
@@ -98,7 +157,38 @@ class DAGNode(object):
98
157
  self.has_tail_next = True
99
158
  self.invalid_tail_next = True
100
159
  self.tail_next_lineno = lineno + tail.lineno - 1
101
- self.out_funcs = [e.attr for e in tail.value.args]
160
+
161
+ # Check if first argument is a dictionary (switch case)
162
+ if (
163
+ len(tail.value.args) == 1
164
+ and isinstance(tail.value.args[0], ast.Dict)
165
+ and any(k.arg == "condition" for k in tail.value.keywords)
166
+ ):
167
+ # This is a switch statement
168
+ switch_cases = self._parse_switch_dict(tail.value.args[0])
169
+ condition_name = None
170
+
171
+ # Get condition parameter
172
+ for keyword in tail.value.keywords:
173
+ if keyword.arg == "condition":
174
+ if hasattr(ast, "Str") and isinstance(keyword.value, ast.Str):
175
+ condition_name = keyword.value.s
176
+ elif isinstance(keyword.value, ast.Constant) and isinstance(
177
+ keyword.value.value, str
178
+ ):
179
+ condition_name = keyword.value.value
180
+ break
181
+
182
+ if switch_cases and condition_name:
183
+ self.type = "split-switch"
184
+ self.condition = condition_name
185
+ self.switch_cases = switch_cases
186
+ self.out_funcs = list(switch_cases.values())
187
+ self.invalid_tail_next = False
188
+ return
189
+
190
+ else:
191
+ self.out_funcs = [e.attr for e in tail.value.args]
102
192
 
103
193
  keywords = dict(
104
194
  (k.arg, getattr(k.value, "s", None)) for k in tail.value.keywords
@@ -138,6 +228,7 @@ class DAGNode(object):
138
228
  in_funcs={in_funcs}
139
229
  out_funcs={out_funcs}
140
230
  split_parents={parents}
231
+ split_branches={branches}
141
232
  matching_join={matching_join}
142
233
  is_inside_foreach={is_inside_foreach}
143
234
  decorators={decos}
@@ -145,6 +236,7 @@ class DAGNode(object):
145
236
  has_tail_next={0.has_tail_next} (line {0.tail_next_lineno})
146
237
  invalid_tail_next={0.invalid_tail_next}
147
238
  foreach_param={0.foreach_param}
239
+ condition={0.condition}
148
240
  parallel_step={0.parallel_step}
149
241
  parallel_foreach={0.parallel_foreach}
150
242
  -> {out}""".format(
@@ -154,6 +246,7 @@ class DAGNode(object):
154
246
  out_funcs=", ".join("[%s]" % x for x in self.out_funcs),
155
247
  in_funcs=", ".join("[%s]" % x for x in self.in_funcs),
156
248
  parents=", ".join("[%s]" % x for x in self.split_parents),
249
+ branches=", ".join("[%s]" % x for x in self.split_branches),
157
250
  decos=" | ".join(map(str, self.decorators)),
158
251
  out=", ".join("[%s]" % x for x in self.out_funcs),
159
252
  )
@@ -181,7 +274,13 @@ class FlowGraph(object):
181
274
  source_code = deindent_docstring("".join(source_lines))
182
275
  function_ast = ast.parse(source_code).body[0]
183
276
  node = DAGNode(
184
- function_ast, func.decorators, func.__doc__, source_file, lineno
277
+ function_ast,
278
+ func.decorators,
279
+ func.wrappers,
280
+ func.config_decorators,
281
+ func.__doc__,
282
+ source_file,
283
+ lineno,
185
284
  )
186
285
  nodes[element] = node
187
286
  return nodes
@@ -198,7 +297,8 @@ class FlowGraph(object):
198
297
  node.is_inside_foreach = True
199
298
 
200
299
  def _traverse_graph(self):
201
- def traverse(node, seen, split_parents):
300
+ def traverse(node, seen, split_parents, split_branches):
301
+ add_split_branch = False
202
302
  try:
203
303
  self.sorted_nodes.remove(node.name)
204
304
  except ValueError:
@@ -206,15 +306,23 @@ class FlowGraph(object):
206
306
  self.sorted_nodes.append(node.name)
207
307
  if node.type in ("split", "foreach"):
208
308
  node.split_parents = split_parents
309
+ node.split_branches = split_branches
310
+ add_split_branch = True
209
311
  split_parents = split_parents + [node.name]
312
+ elif node.type == "split-switch":
313
+ node.split_parents = split_parents
314
+ node.split_branches = split_branches
210
315
  elif node.type == "join":
211
316
  # ignore joins without splits
212
317
  if split_parents:
213
318
  self[split_parents[-1]].matching_join = node.name
214
319
  node.split_parents = split_parents
320
+ node.split_branches = split_branches[:-1]
215
321
  split_parents = split_parents[:-1]
322
+ split_branches = split_branches[:-1]
216
323
  else:
217
324
  node.split_parents = split_parents
325
+ node.split_branches = split_branches
218
326
 
219
327
  for n in node.out_funcs:
220
328
  # graph may contain loops - ignore them
@@ -223,10 +331,15 @@ class FlowGraph(object):
223
331
  if n in self:
224
332
  child = self[n]
225
333
  child.in_funcs.add(node.name)
226
- traverse(child, seen + [n], split_parents)
334
+ traverse(
335
+ child,
336
+ seen + [n],
337
+ split_parents,
338
+ split_branches + ([n] if add_split_branch else []),
339
+ )
227
340
 
228
341
  if "start" in self:
229
- traverse(self["start"], [], [])
342
+ traverse(self["start"], [], [], [])
230
343
 
231
344
  # fix the order of in_funcs
232
345
  for node in self.nodes.values():
@@ -247,15 +360,37 @@ class FlowGraph(object):
247
360
  def output_dot(self):
248
361
  def edge_specs():
249
362
  for node in self.nodes.values():
250
- for edge in node.out_funcs:
251
- yield "%s -> %s;" % (node.name, edge)
363
+ if node.type == "split-switch":
364
+ # Label edges for switch cases
365
+ for case_value, step_name in node.switch_cases.items():
366
+ yield (
367
+ '{0} -> {1} [label="{2}" color="blue" fontcolor="blue"];'.format(
368
+ node.name, step_name, case_value
369
+ )
370
+ )
371
+ else:
372
+ for edge in node.out_funcs:
373
+ yield "%s -> %s;" % (node.name, edge)
252
374
 
253
375
  def node_specs():
254
376
  for node in self.nodes.values():
255
- nodetype = "join" if node.num_args > 1 else node.type
256
- yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
257
- node, type=nodetype
258
- )
377
+ if node.type == "split-switch":
378
+ # Hexagon shape for switch nodes
379
+ condition_label = (
380
+ f"switch: {node.condition}" if node.condition else "switch"
381
+ )
382
+ yield (
383
+ '"{0.name}" '
384
+ '[ label = <<b>{0.name}</b><br/><font point-size="9">{condition}</font>> '
385
+ ' fontname = "Helvetica" '
386
+ ' shape = "hexagon" '
387
+ ' style = "filled" fillcolor = "lightgreen" ];'
388
+ ).format(node, condition=condition_label)
389
+ else:
390
+ nodetype = "join" if node.num_args > 1 else node.type
391
+ yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
392
+ node, type=nodetype
393
+ )
259
394
 
260
395
  return (
261
396
  "digraph {0.name} {{\n"
@@ -279,6 +414,8 @@ class FlowGraph(object):
279
414
  if node.parallel_foreach:
280
415
  return "split-parallel"
281
416
  return "split-foreach"
417
+ elif node.type == "split-switch":
418
+ return "split-switch"
282
419
  return "unknown" # Should never happen
283
420
 
284
421
  def node_to_dict(name, node):
@@ -293,9 +430,19 @@ class FlowGraph(object):
293
430
  "name": deco.name,
294
431
  "attributes": to_pod(deco.attributes),
295
432
  "statically_defined": deco.statically_defined,
433
+ "inserted_by": deco.inserted_by,
296
434
  }
297
435
  for deco in node.decorators
298
436
  if not deco.name.startswith("_")
437
+ ]
438
+ + [
439
+ {
440
+ "name": deco.decorator_name,
441
+ "attributes": {"_args": deco._args, **deco._kwargs},
442
+ "statically_defined": deco.statically_defined,
443
+ "inserted_by": deco.inserted_by,
444
+ }
445
+ for deco in chain(node.wrappers, node.config_decorators)
299
446
  ],
300
447
  "next": node.out_funcs,
301
448
  }
@@ -303,6 +450,9 @@ class FlowGraph(object):
303
450
  d["foreach_artifact"] = node.foreach_param
304
451
  elif d["type"] == "split-parallel":
305
452
  d["num_parallel"] = node.num_parallel
453
+ elif d["type"] == "split-switch":
454
+ d["condition"] = node.condition
455
+ d["switch_cases"] = node.switch_cases
306
456
  if node.matching_join:
307
457
  d["matching_join"] = node.matching_join
308
458
  return d
@@ -317,8 +467,8 @@ class FlowGraph(object):
317
467
  steps_info[cur_name] = node_dict
318
468
  resulting_list.append(cur_name)
319
469
 
320
- if cur_node.type not in ("start", "linear", "join"):
321
- # We need to look at the different branches for this
470
+ node_type = node_to_type(cur_node)
471
+ if node_type in ("split-static", "split-foreach"):
322
472
  resulting_list.append(
323
473
  [
324
474
  populate_block(s, cur_node.matching_join)
@@ -326,8 +476,21 @@ class FlowGraph(object):
326
476
  ]
327
477
  )
328
478
  cur_name = cur_node.matching_join
479
+ elif node_type == "split-switch":
480
+ all_paths = [
481
+ populate_block(s, end_name)
482
+ for s in cur_node.out_funcs
483
+ if s != cur_name
484
+ ]
485
+ resulting_list.append(all_paths)
486
+ cur_name = end_name
329
487
  else:
330
- cur_name = cur_node.out_funcs[0]
488
+ # handles only linear, start, and join steps.
489
+ if cur_node.out_funcs:
490
+ cur_name = cur_node.out_funcs[0]
491
+ else:
492
+ # handles terminal nodes or when we jump to 'end_name'.
493
+ break
331
494
  return resulting_list
332
495
 
333
496
  graph_structure = populate_block("start", "end")
metaflow/includefile.py CHANGED
@@ -7,9 +7,10 @@ import json
7
7
  import os
8
8
 
9
9
  from hashlib import sha1
10
- from typing import Any, Callable, Dict, Optional
10
+ from typing import Any, Callable, Dict, Optional, Union
11
11
 
12
12
  from metaflow._vendor import click
13
+ from metaflow._vendor import yaml
13
14
 
14
15
  from .exception import MetaflowException
15
16
  from .parameters import (
@@ -20,7 +21,7 @@ from .parameters import (
20
21
  )
21
22
 
22
23
  from .plugins import DATACLIENTS
23
- from .user_configs.config_parameters import ConfigValue
24
+ from .user_configs.config_options import ConfigInput
24
25
  from .util import get_username
25
26
 
26
27
  import functools
@@ -261,6 +262,12 @@ class IncludeFile(Parameter):
261
262
  show_default : bool, default True
262
263
  If True, show the default value in the help text. A value of None is equivalent
263
264
  to True.
265
+ parser : Union[str, Callable[[str], Any]], optional, default None
266
+ If a callable, it is a function that can parse the file contents
267
+ into any desired format. If a string, the string should refer to
268
+ a function (like "my_parser_package.my_parser.my_parser_function") which should
269
+ be able to parse the file contents. If the name starts with a ".", it is assumed
270
+ to be relative to "metaflow".
264
271
  """
265
272
 
266
273
  def __init__(
@@ -270,6 +277,7 @@ class IncludeFile(Parameter):
270
277
  is_text: Optional[bool] = None,
271
278
  encoding: Optional[str] = None,
272
279
  help: Optional[str] = None,
280
+ parser: Optional[Union[str, Callable[[str], Any]]] = None,
273
281
  **kwargs: Dict[str, str]
274
282
  ):
275
283
  self._includefile_overrides = {}
@@ -277,6 +285,7 @@ class IncludeFile(Parameter):
277
285
  self._includefile_overrides["is_text"] = is_text
278
286
  if encoding is not None:
279
287
  self._includefile_overrides["encoding"] = encoding
288
+ self._parser = parser
280
289
  # NOTA: Right now, there is an issue where these can't be overridden by config
281
290
  # in all circumstances. Ignoring for now.
282
291
  super(IncludeFile, self).__init__(
@@ -336,7 +345,20 @@ class IncludeFile(Parameter):
336
345
  def load_parameter(self, v):
337
346
  if v is None:
338
347
  return v
339
- return v.decode(self.name, var_type="Parameter")
348
+
349
+ # Get the raw content from the file
350
+ content = v.decode(self.name, var_type="Parameter")
351
+ # If a parser is specified, use it to parse the content
352
+ if self._parser is not None:
353
+ try:
354
+ return ConfigInput._call_parser(self._parser, content)
355
+ except Exception as e:
356
+ raise MetaflowException(
357
+ "Failed to parse content in parameter '%s' using parser: %s"
358
+ % (self.name, str(e))
359
+ ) from e
360
+
361
+ return content
340
362
 
341
363
  @staticmethod
342
364
  def _eval_default(is_text, encoding, default_path):
metaflow/lint.py CHANGED
@@ -134,7 +134,13 @@ def check_valid_transitions(graph):
134
134
  msg = (
135
135
  "Step *{0.name}* specifies an invalid self.next() transition. "
136
136
  "Make sure the self.next() expression matches with one of the "
137
- "supported transition types."
137
+ "supported transition types:\n"
138
+ " • Linear: self.next(self.step_name)\n"
139
+ " • Fan-out: self.next(self.step1, self.step2, ...)\n"
140
+ " • Foreach: self.next(self.step, foreach='variable')\n"
141
+ " • Switch: self.next({{\"key\": self.step, ...}}, condition='variable')\n\n"
142
+ "For switch statements, keys must be string literals, numbers or config expressions "
143
+ "(self.config.key_name), not variables."
138
144
  )
139
145
  for node in graph:
140
146
  if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
@@ -169,6 +175,8 @@ def check_for_acyclicity(graph):
169
175
 
170
176
  def check_path(node, seen):
171
177
  for n in node.out_funcs:
178
+ if node.type == "split-switch" and n == node.name:
179
+ continue
172
180
  if n in seen:
173
181
  path = "->".join(seen + [n])
174
182
  raise LintWarn(
@@ -232,7 +240,15 @@ def check_split_join_balance(graph):
232
240
  new_stack = split_stack
233
241
  elif node.type in ("split", "foreach"):
234
242
  new_stack = split_stack + [("split", node.out_funcs)]
243
+ elif node.type == "split-switch":
244
+ # For a switch, continue traversal down each path with the same stack
245
+ for n in node.out_funcs:
246
+ if node.type == "split-switch" and n == node.name:
247
+ continue
248
+ traverse(graph[n], split_stack)
249
+ return
235
250
  elif node.type == "end":
251
+ new_stack = split_stack
236
252
  if split_stack:
237
253
  _, split_roots = split_stack.pop()
238
254
  roots = ", ".join(split_roots)
@@ -240,11 +256,22 @@ def check_split_join_balance(graph):
240
256
  msg0.format(roots=roots), node.func_lineno, node.source_file
241
257
  )
242
258
  elif node.type == "join":
259
+ new_stack = split_stack
243
260
  if split_stack:
244
261
  _, split_roots = split_stack[-1]
245
262
  new_stack = split_stack[:-1]
246
- if len(node.in_funcs) != len(split_roots):
247
- paths = ", ".join(node.in_funcs)
263
+
264
+ # Resolve each incoming function to its root branch from the split.
265
+ resolved_branches = set(
266
+ graph[n].split_branches[-1] for n in node.in_funcs
267
+ )
268
+
269
+ # compares the set of resolved branches against the expected branches
270
+ # from the split.
271
+ if len(resolved_branches) != len(
272
+ split_roots
273
+ ) or resolved_branches ^ set(split_roots):
274
+ paths = ", ".join(resolved_branches)
248
275
  roots = ", ".join(split_roots)
249
276
  raise LintWarn(
250
277
  msg1.format(
@@ -266,13 +293,55 @@ def check_split_join_balance(graph):
266
293
 
267
294
  if not all_equal(map(parents, node.in_funcs)):
268
295
  raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
296
+ else:
297
+ new_stack = split_stack
269
298
 
270
299
  for n in node.out_funcs:
300
+ if node.type == "split-switch" and n == node.name:
301
+ continue
271
302
  traverse(graph[n], new_stack)
272
303
 
273
304
  traverse(graph["start"], [])
274
305
 
275
306
 
307
+ @linter.ensure_static_graph
308
+ @linter.check
309
+ def check_switch_splits(graph):
310
+ """Check conditional split constraints"""
311
+ msg0 = (
312
+ "Step *{0.name}* is a switch split but defines {num} transitions. "
313
+ "Switch splits must define at least 2 transitions."
314
+ )
315
+ msg1 = "Step *{0.name}* is a switch split but has no condition variable."
316
+ msg2 = "Step *{0.name}* is a switch split but has no switch cases defined."
317
+
318
+ for node in graph:
319
+ if node.type == "split-switch":
320
+ # Check at least 2 outputs
321
+ if len(node.out_funcs) < 2:
322
+ raise LintWarn(
323
+ msg0.format(node, num=len(node.out_funcs)),
324
+ node.func_lineno,
325
+ node.source_file,
326
+ )
327
+
328
+ # Check condition exists
329
+ if not node.condition:
330
+ raise LintWarn(
331
+ msg1.format(node),
332
+ node.func_lineno,
333
+ node.source_file,
334
+ )
335
+
336
+ # Check switch cases exist
337
+ if not node.switch_cases:
338
+ raise LintWarn(
339
+ msg2.format(node),
340
+ node.func_lineno,
341
+ node.source_file,
342
+ )
343
+
344
+
276
345
  @linter.ensure_static_graph
277
346
  @linter.check
278
347
  def check_empty_foreaches(graph):
@@ -347,3 +416,25 @@ def check_nested_foreach(graph):
347
416
  if node.type == "foreach":
348
417
  if any(graph[p].type == "foreach" for p in node.split_parents):
349
418
  raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
419
+
420
+
421
+ @linter.ensure_static_graph
422
+ @linter.check
423
+ def check_ambiguous_joins(graph):
424
+ for node in graph:
425
+ if node.type == "join":
426
+ problematic_parents = [
427
+ p_name
428
+ for p_name in node.in_funcs
429
+ if graph[p_name].type == "split-switch"
430
+ ]
431
+ if problematic_parents:
432
+ msg = (
433
+ "A conditional path cannot lead directly to a join step.\n"
434
+ "In your conditional step(s) {parents}, one or more of the possible paths transition directly to the join step {join_name}.\n"
435
+ "As a workaround, please introduce an intermediate, unconditional step on that specific path before joining."
436
+ ).format(
437
+ parents=", ".join("*%s*" % p for p in problematic_parents),
438
+ join_name="*%s*" % node.name,
439
+ )
440
+ raise LintWarn(msg, node.func_lineno, node.source_file)
metaflow/meta_files.py ADDED
@@ -0,0 +1,13 @@
1
+ _UNINITIALIZED = object()
2
+ _info_file_content = _UNINITIALIZED
3
+
4
+
5
+ def read_info_file():
6
+ # Prevent circular import
7
+ from .packaging_sys import MetaflowCodeContent
8
+
9
+ global _info_file_content
10
+
11
+ if id(_info_file_content) == id(_UNINITIALIZED):
12
+ _info_file_content = MetaflowCodeContent.get_info()
13
+ return _info_file_content
@@ -632,7 +632,12 @@ class MetadataProvider(object):
632
632
 
633
633
  def _get_git_info_as_dict(self):
634
634
  git_info = {}
635
- env = self._environment.get_environment_info()
635
+ # NOTE: For flows executing remotely, we want to read from the INFO file of the code package that contains
636
+ # information on the original environment that deployed the flow.
637
+ # Otherwise git related info will be missing, as the repository is not part of the codepackage.
638
+ from metaflow.packaging_sys import MetaflowCodeContent
639
+
640
+ env = MetaflowCodeContent.get_info() or self._environment.get_environment_info()
636
641
  for key in [
637
642
  "repo_url",
638
643
  "branch_name",
@@ -674,11 +679,17 @@ class MetadataProvider(object):
674
679
  if code_sha:
675
680
  code_url = os.environ.get("METAFLOW_CODE_URL")
676
681
  code_ds = os.environ.get("METAFLOW_CODE_DS")
682
+ code_metadata = os.environ.get("METAFLOW_CODE_METADATA")
677
683
  metadata.append(
678
684
  MetaDatum(
679
685
  field="code-package",
680
686
  value=json.dumps(
681
- {"ds_type": code_ds, "sha": code_sha, "location": code_url}
687
+ {
688
+ "ds_type": code_ds,
689
+ "sha": code_sha,
690
+ "location": code_url,
691
+ "metadata": code_metadata,
692
+ }
682
693
  ),
683
694
  type="code-package",
684
695
  tags=["attempt_id:{0}".format(attempt)],