ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/graph.py CHANGED
@@ -2,6 +2,11 @@ import inspect
2
2
  import ast
3
3
  import re
4
4
 
5
+ from itertools import chain
6
+
7
+
8
+ from .util import to_pod
9
+
5
10
 
6
11
  def deindent_docstring(doc):
7
12
  if doc:
@@ -42,10 +47,17 @@ def deindent_docstring(doc):
42
47
 
43
48
 
44
49
  class DAGNode(object):
45
- def __init__(self, func_ast, decos, doc):
50
+ def __init__(
51
+ self, func_ast, decos, wrappers, config_decorators, doc, source_file, lineno
52
+ ):
46
53
  self.name = func_ast.name
47
- self.func_lineno = func_ast.lineno
54
+ self.source_file = source_file
55
+ # lineno is the start line of decorators in source_file
56
+ # func_ast.lineno is lines from decorators start to def of function
57
+ self.func_lineno = lineno + func_ast.lineno - 1
48
58
  self.decorators = decos
59
+ self.wrappers = wrappers
60
+ self.config_decorators = config_decorators
49
61
  self.doc = deindent_docstring(doc)
50
62
  self.parallel_step = any(getattr(deco, "IS_PARALLEL", False) for deco in decos)
51
63
 
@@ -56,14 +68,17 @@ class DAGNode(object):
56
68
  self.has_tail_next = False
57
69
  self.invalid_tail_next = False
58
70
  self.num_args = 0
71
+ self.switch_cases = {}
72
+ self.condition = None
59
73
  self.foreach_param = None
60
74
  self.num_parallel = 0
61
75
  self.parallel_foreach = False
62
- self._parse(func_ast)
76
+ self._parse(func_ast, lineno)
63
77
 
64
78
  # these attributes are populated by _traverse_graph
65
79
  self.in_funcs = set()
66
80
  self.split_parents = []
81
+ self.split_branches = []
67
82
  self.matching_join = None
68
83
  # these attributes are populated by _postprocess
69
84
  self.is_inside_foreach = False
@@ -71,8 +86,57 @@ class DAGNode(object):
71
86
  def _expr_str(self, expr):
72
87
  return "%s.%s" % (expr.value.id, expr.attr)
73
88
 
74
- def _parse(self, func_ast):
89
+ def _parse_switch_dict(self, dict_node):
90
+ switch_cases = {}
91
+
92
+ if isinstance(dict_node, ast.Dict):
93
+ for key, value in zip(dict_node.keys, dict_node.values):
94
+ case_key = None
95
+
96
+ # handle string literals
97
+ if hasattr(ast, "Str") and isinstance(key, ast.Str):
98
+ case_key = key.s
99
+ elif isinstance(key, ast.Constant):
100
+ case_key = key.value
101
+ elif isinstance(key, ast.Attribute):
102
+ if isinstance(key.value, ast.Attribute) and isinstance(
103
+ key.value.value, ast.Name
104
+ ):
105
+ # This handles self.config.some_key
106
+ if key.value.value.id == "self":
107
+ config_var = key.value.attr
108
+ config_key = key.attr
109
+ case_key = f"config:{config_var}.{config_key}"
110
+ else:
111
+ return None
112
+ else:
113
+ return None
114
+
115
+ # handle variables or other dynamic expressions - not allowed
116
+ elif isinstance(key, ast.Name):
117
+ return None
118
+ else:
119
+ # can't statically analyze this key
120
+ return None
121
+
122
+ if case_key is None:
123
+ return None
124
+
125
+ # extract the step name from the value
126
+ if isinstance(value, ast.Attribute) and isinstance(
127
+ value.value, ast.Name
128
+ ):
129
+ if value.value.id == "self":
130
+ step_name = value.attr
131
+ switch_cases[case_key] = step_name
132
+ else:
133
+ return None
134
+ else:
135
+ return None
136
+
137
+ return switch_cases if switch_cases else None
75
138
 
139
+ def _parse(self, func_ast, lineno):
76
140
  self.num_args = len(func_ast.args.args)
77
141
  tail = func_ast.body[-1]
78
142
 
@@ -92,8 +156,39 @@ class DAGNode(object):
92
156
 
93
157
  self.has_tail_next = True
94
158
  self.invalid_tail_next = True
95
- self.tail_next_lineno = tail.lineno
96
- self.out_funcs = [e.attr for e in tail.value.args]
159
+ self.tail_next_lineno = lineno + tail.lineno - 1
160
+
161
+ # Check if first argument is a dictionary (switch case)
162
+ if (
163
+ len(tail.value.args) == 1
164
+ and isinstance(tail.value.args[0], ast.Dict)
165
+ and any(k.arg == "condition" for k in tail.value.keywords)
166
+ ):
167
+ # This is a switch statement
168
+ switch_cases = self._parse_switch_dict(tail.value.args[0])
169
+ condition_name = None
170
+
171
+ # Get condition parameter
172
+ for keyword in tail.value.keywords:
173
+ if keyword.arg == "condition":
174
+ if hasattr(ast, "Str") and isinstance(keyword.value, ast.Str):
175
+ condition_name = keyword.value.s
176
+ elif isinstance(keyword.value, ast.Constant) and isinstance(
177
+ keyword.value.value, str
178
+ ):
179
+ condition_name = keyword.value.value
180
+ break
181
+
182
+ if switch_cases and condition_name:
183
+ self.type = "split-switch"
184
+ self.condition = condition_name
185
+ self.switch_cases = switch_cases
186
+ self.out_funcs = list(switch_cases.values())
187
+ self.invalid_tail_next = False
188
+ return
189
+
190
+ else:
191
+ self.out_funcs = [e.attr for e in tail.value.args]
97
192
 
98
193
  keywords = dict(
99
194
  (k.arg, getattr(k.value, "s", None)) for k in tail.value.keywords
@@ -129,10 +224,11 @@ class DAGNode(object):
129
224
  return
130
225
 
131
226
  def __str__(self):
132
- return """*[{0.name} {0.type} (line {0.func_lineno})]*
227
+ return """*[{0.name} {0.type} ({0.source_file} line {0.func_lineno})]*
133
228
  in_funcs={in_funcs}
134
229
  out_funcs={out_funcs}
135
230
  split_parents={parents}
231
+ split_branches={branches}
136
232
  matching_join={matching_join}
137
233
  is_inside_foreach={is_inside_foreach}
138
234
  decorators={decos}
@@ -140,6 +236,7 @@ class DAGNode(object):
140
236
  has_tail_next={0.has_tail_next} (line {0.tail_next_lineno})
141
237
  invalid_tail_next={0.invalid_tail_next}
142
238
  foreach_param={0.foreach_param}
239
+ condition={0.condition}
143
240
  parallel_step={0.parallel_step}
144
241
  parallel_foreach={0.parallel_foreach}
145
242
  -> {out}""".format(
@@ -149,39 +246,43 @@ class DAGNode(object):
149
246
  out_funcs=", ".join("[%s]" % x for x in self.out_funcs),
150
247
  in_funcs=", ".join("[%s]" % x for x in self.in_funcs),
151
248
  parents=", ".join("[%s]" % x for x in self.split_parents),
249
+ branches=", ".join("[%s]" % x for x in self.split_branches),
152
250
  decos=" | ".join(map(str, self.decorators)),
153
251
  out=", ".join("[%s]" % x for x in self.out_funcs),
154
252
  )
155
253
 
156
254
 
157
- class StepVisitor(ast.NodeVisitor):
158
- def __init__(self, nodes, flow):
159
- self.nodes = nodes
160
- self.flow = flow
161
- super(StepVisitor, self).__init__()
162
-
163
- def visit_FunctionDef(self, node):
164
- func = getattr(self.flow, node.name)
165
- if hasattr(func, "is_step"):
166
- self.nodes[node.name] = DAGNode(node, func.decorators, func.__doc__)
167
-
168
-
169
255
  class FlowGraph(object):
170
256
  def __init__(self, flow):
171
257
  self.name = flow.__name__
172
258
  self.nodes = self._create_nodes(flow)
173
259
  self.doc = deindent_docstring(flow.__doc__)
260
+ # nodes sorted in topological order.
261
+ self.sorted_nodes = []
174
262
  self._traverse_graph()
175
263
  self._postprocess()
176
264
 
177
265
  def _create_nodes(self, flow):
178
- module = __import__(flow.__module__)
179
- tree = ast.parse(inspect.getsource(module)).body
180
- root = [n for n in tree if isinstance(n, ast.ClassDef) and n.name == self.name][
181
- 0
182
- ]
183
266
  nodes = {}
184
- StepVisitor(nodes, flow).visit(root)
267
+ for element in dir(flow):
268
+ func = getattr(flow, element)
269
+ if callable(func) and hasattr(func, "is_step"):
270
+ source_file = inspect.getsourcefile(func)
271
+ source_lines, lineno = inspect.getsourcelines(func)
272
+ # This also works for code (strips out leading whitspace based on
273
+ # first line)
274
+ source_code = deindent_docstring("".join(source_lines))
275
+ function_ast = ast.parse(source_code).body[0]
276
+ node = DAGNode(
277
+ function_ast,
278
+ func.decorators,
279
+ func.wrappers,
280
+ func.config_decorators,
281
+ func.__doc__,
282
+ source_file,
283
+ lineno,
284
+ )
285
+ nodes[element] = node
185
286
  return nodes
186
287
 
187
288
  def _postprocess(self):
@@ -196,18 +297,32 @@ class FlowGraph(object):
196
297
  node.is_inside_foreach = True
197
298
 
198
299
  def _traverse_graph(self):
199
- def traverse(node, seen, split_parents):
300
+ def traverse(node, seen, split_parents, split_branches):
301
+ add_split_branch = False
302
+ try:
303
+ self.sorted_nodes.remove(node.name)
304
+ except ValueError:
305
+ pass
306
+ self.sorted_nodes.append(node.name)
200
307
  if node.type in ("split", "foreach"):
201
308
  node.split_parents = split_parents
309
+ node.split_branches = split_branches
310
+ add_split_branch = True
202
311
  split_parents = split_parents + [node.name]
312
+ elif node.type == "split-switch":
313
+ node.split_parents = split_parents
314
+ node.split_branches = split_branches
203
315
  elif node.type == "join":
204
316
  # ignore joins without splits
205
317
  if split_parents:
206
318
  self[split_parents[-1]].matching_join = node.name
207
319
  node.split_parents = split_parents
320
+ node.split_branches = split_branches[:-1]
208
321
  split_parents = split_parents[:-1]
322
+ split_branches = split_branches[:-1]
209
323
  else:
210
324
  node.split_parents = split_parents
325
+ node.split_branches = split_branches
211
326
 
212
327
  for n in node.out_funcs:
213
328
  # graph may contain loops - ignore them
@@ -216,10 +331,15 @@ class FlowGraph(object):
216
331
  if n in self:
217
332
  child = self[n]
218
333
  child.in_funcs.add(node.name)
219
- traverse(child, seen + [n], split_parents)
334
+ traverse(
335
+ child,
336
+ seen + [n],
337
+ split_parents,
338
+ split_branches + ([n] if add_split_branch else []),
339
+ )
220
340
 
221
341
  if "start" in self:
222
- traverse(self["start"], [], [])
342
+ traverse(self["start"], [], [], [])
223
343
 
224
344
  # fix the order of in_funcs
225
345
  for node in self.nodes.values():
@@ -235,22 +355,42 @@ class FlowGraph(object):
235
355
  return iter(self.nodes.values())
236
356
 
237
357
  def __str__(self):
238
- return "\n".join(
239
- str(n) for _, n in sorted((n.func_lineno, n) for n in self.nodes.values())
240
- )
358
+ return "\n".join(str(self[n]) for n in self.sorted_nodes)
241
359
 
242
360
  def output_dot(self):
243
361
  def edge_specs():
244
362
  for node in self.nodes.values():
245
- for edge in node.out_funcs:
246
- yield "%s -> %s;" % (node.name, edge)
363
+ if node.type == "split-switch":
364
+ # Label edges for switch cases
365
+ for case_value, step_name in node.switch_cases.items():
366
+ yield (
367
+ '{0} -> {1} [label="{2}" color="blue" fontcolor="blue"];'.format(
368
+ node.name, step_name, case_value
369
+ )
370
+ )
371
+ else:
372
+ for edge in node.out_funcs:
373
+ yield "%s -> %s;" % (node.name, edge)
247
374
 
248
375
  def node_specs():
249
376
  for node in self.nodes.values():
250
- nodetype = "join" if node.num_args > 1 else node.type
251
- yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
252
- node, type=nodetype
253
- )
377
+ if node.type == "split-switch":
378
+ # Hexagon shape for switch nodes
379
+ condition_label = (
380
+ f"switch: {node.condition}" if node.condition else "switch"
381
+ )
382
+ yield (
383
+ '"{0.name}" '
384
+ '[ label = <<b>{0.name}</b><br/><font point-size="9">{condition}</font>> '
385
+ ' fontname = "Helvetica" '
386
+ ' shape = "hexagon" '
387
+ ' style = "filled" fillcolor = "lightgreen" ];'
388
+ ).format(node, condition=condition_label)
389
+ else:
390
+ nodetype = "join" if node.num_args > 1 else node.type
391
+ yield '"{0.name}"' '[ label = <<b>{0.name}</b> | <font point-size="10">{type}</font>> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
392
+ node, type=nodetype
393
+ )
254
394
 
255
395
  return (
256
396
  "digraph {0.name} {{\n"
@@ -262,7 +402,6 @@ class FlowGraph(object):
262
402
  )
263
403
 
264
404
  def output_steps(self):
265
-
266
405
  steps_info = {}
267
406
  graph_structure = []
268
407
 
@@ -275,6 +414,8 @@ class FlowGraph(object):
275
414
  if node.parallel_foreach:
276
415
  return "split-parallel"
277
416
  return "split-foreach"
417
+ elif node.type == "split-switch":
418
+ return "split-switch"
278
419
  return "unknown" # Should never happen
279
420
 
280
421
  def node_to_dict(name, node):
@@ -282,15 +423,26 @@ class FlowGraph(object):
282
423
  "name": name,
283
424
  "type": node_to_type(node),
284
425
  "line": node.func_lineno,
426
+ "source_file": node.source_file,
285
427
  "doc": node.doc,
286
428
  "decorators": [
287
429
  {
288
430
  "name": deco.name,
289
- "attributes": deco.attributes,
431
+ "attributes": to_pod(deco.attributes),
290
432
  "statically_defined": deco.statically_defined,
433
+ "inserted_by": deco.inserted_by,
291
434
  }
292
435
  for deco in node.decorators
293
436
  if not deco.name.startswith("_")
437
+ ]
438
+ + [
439
+ {
440
+ "name": deco.decorator_name,
441
+ "attributes": {"_args": deco._args, **deco._kwargs},
442
+ "statically_defined": deco.statically_defined,
443
+ "inserted_by": deco.inserted_by,
444
+ }
445
+ for deco in chain(node.wrappers, node.config_decorators)
294
446
  ],
295
447
  "next": node.out_funcs,
296
448
  }
@@ -298,6 +450,9 @@ class FlowGraph(object):
298
450
  d["foreach_artifact"] = node.foreach_param
299
451
  elif d["type"] == "split-parallel":
300
452
  d["num_parallel"] = node.num_parallel
453
+ elif d["type"] == "split-switch":
454
+ d["condition"] = node.condition
455
+ d["switch_cases"] = node.switch_cases
301
456
  if node.matching_join:
302
457
  d["matching_join"] = node.matching_join
303
458
  return d
@@ -312,8 +467,8 @@ class FlowGraph(object):
312
467
  steps_info[cur_name] = node_dict
313
468
  resulting_list.append(cur_name)
314
469
 
315
- if cur_node.type not in ("start", "linear", "join"):
316
- # We need to look at the different branches for this
470
+ node_type = node_to_type(cur_node)
471
+ if node_type in ("split-static", "split-foreach"):
317
472
  resulting_list.append(
318
473
  [
319
474
  populate_block(s, cur_node.matching_join)
@@ -321,8 +476,21 @@ class FlowGraph(object):
321
476
  ]
322
477
  )
323
478
  cur_name = cur_node.matching_join
479
+ elif node_type == "split-switch":
480
+ all_paths = [
481
+ populate_block(s, end_name)
482
+ for s in cur_node.out_funcs
483
+ if s != cur_name
484
+ ]
485
+ resulting_list.append(all_paths)
486
+ cur_name = end_name
324
487
  else:
325
- cur_name = cur_node.out_funcs[0]
488
+ # handles only linear, start, and join steps.
489
+ if cur_node.out_funcs:
490
+ cur_name = cur_node.out_funcs[0]
491
+ else:
492
+ # handles terminal nodes or when we jump to 'end_name'.
493
+ break
326
494
  return resulting_list
327
495
 
328
496
  graph_structure = populate_block("start", "end")
metaflow/includefile.py CHANGED
@@ -1,14 +1,16 @@
1
1
  from collections import namedtuple
2
2
  import gzip
3
3
 
4
+ import importlib
4
5
  import io
5
6
  import json
6
7
  import os
7
8
 
8
9
  from hashlib import sha1
9
- from typing import Any, Callable, Dict, Optional
10
+ from typing import Any, Callable, Dict, Optional, Union
10
11
 
11
12
  from metaflow._vendor import click
13
+ from metaflow._vendor import yaml
12
14
 
13
15
  from .exception import MetaflowException
14
16
  from .parameters import (
@@ -17,6 +19,9 @@ from .parameters import (
17
19
  Parameter,
18
20
  ParameterContext,
19
21
  )
22
+
23
+ from .plugins import DATACLIENTS
24
+ from .user_configs.config_options import ConfigInput
20
25
  from .util import get_username
21
26
 
22
27
  import functools
@@ -47,16 +52,7 @@ _DelayedExecContext = namedtuple(
47
52
 
48
53
 
49
54
  # From here on out, this is the IncludeFile implementation.
50
- from metaflow.plugins.datatools import Local, S3
51
- from metaflow.plugins.azure.includefile_support import Azure
52
- from metaflow.plugins.gcp.includefile_support import GS
53
-
54
- DATACLIENTS = {
55
- "local": Local,
56
- "s3": S3,
57
- "azure": Azure,
58
- "gs": GS,
59
- }
55
+ _dict_dataclients = {d.TYPE: d for d in DATACLIENTS}
60
56
 
61
57
 
62
58
  class IncludedFile(object):
@@ -142,6 +138,7 @@ class FilePathClass(click.ParamType):
142
138
  parameter_name=param.name,
143
139
  logger=ctx.obj.echo,
144
140
  ds_type=ctx.obj.datastore_impl.TYPE,
141
+ configs=None,
145
142
  )
146
143
 
147
144
  if len(value) > 0 and (value.startswith("{") or value.startswith('"{')):
@@ -167,7 +164,7 @@ class FilePathClass(click.ParamType):
167
164
  "IncludeFile using a direct reference to a file in cloud storage is no "
168
165
  "longer supported. Contact the Metaflow team if you need this supported"
169
166
  )
170
- # if DATACLIENTS.get(path[:prefix_pos]) is None:
167
+ # if _dict_dataclients.get(path[:prefix_pos]) is None:
171
168
  # self.fail(
172
169
  # "IncludeFile: no handler for external file of type '%s' "
173
170
  # "(given path is '%s')" % (path[:prefix_pos], path)
@@ -187,7 +184,7 @@ class FilePathClass(click.ParamType):
187
184
  pass
188
185
  except OSError:
189
186
  self.fail("IncludeFile: could not open file '%s' for reading" % path)
190
- handler = DATACLIENTS.get(ctx.ds_type)
187
+ handler = _dict_dataclients.get(ctx.ds_type)
191
188
  if handler is None:
192
189
  self.fail(
193
190
  "IncludeFile: no data-client for datastore of type '%s'"
@@ -213,7 +210,7 @@ class FilePathClass(click.ParamType):
213
210
  ctx.path,
214
211
  ctx.is_text,
215
212
  ctx.encoding,
216
- DATACLIENTS[ctx.handler_type],
213
+ _dict_dataclients[ctx.handler_type],
217
214
  ctx.echo,
218
215
  )
219
216
  )
@@ -249,29 +246,71 @@ class IncludeFile(Parameter):
249
246
  default : Union[str, Callable[ParameterContext, str]]
250
247
  Default path to a local file. A function
251
248
  implies that the parameter corresponds to a *deploy-time parameter*.
252
- is_text : bool, default True
249
+ is_text : bool, optional, default None
253
250
  Convert the file contents to a string using the provided `encoding`.
254
- If False, the artifact is stored in `bytes`.
255
- encoding : str, optional, default 'utf-8'
256
- Use this encoding to decode the file contexts if `is_text=True`.
257
- required : bool, default False
251
+ If False, the artifact is stored in `bytes`. A value of None is equivalent to
252
+ True.
253
+ encoding : str, optional, default None
254
+ Use this encoding to decode the file contexts if `is_text=True`. A value of None
255
+ is equivalent to "utf-8".
256
+ required : bool, optional, default None
258
257
  Require that the user specified a value for the parameter.
259
- `required=True` implies that the `default` is not used.
258
+ `required=True` implies that the `default` is not used. A value of None is
259
+ equivalent to False
260
260
  help : str, optional
261
261
  Help text to show in `run --help`.
262
262
  show_default : bool, default True
263
- If True, show the default value in the help text.
263
+ If True, show the default value in the help text. A value of None is equivalent
264
+ to True.
265
+ parser : Union[str, Callable[[str], Any]], optional, default None
266
+ If a callable, it is a function that can parse the file contents
267
+ into any desired format. If a string, the string should refer to
268
+ a function (like "my_parser_package.my_parser.my_parser_function") which should
269
+ be able to parse the file contents. If the name starts with a ".", it is assumed
270
+ to be relative to "metaflow".
264
271
  """
265
272
 
266
273
  def __init__(
267
274
  self,
268
275
  name: str,
269
- required: bool = False,
270
- is_text: bool = True,
271
- encoding: str = "utf-8",
276
+ required: Optional[bool] = None,
277
+ is_text: Optional[bool] = None,
278
+ encoding: Optional[str] = None,
272
279
  help: Optional[str] = None,
280
+ parser: Optional[Union[str, Callable[[str], Any]]] = None,
273
281
  **kwargs: Dict[str, str]
274
282
  ):
283
+ self._includefile_overrides = {}
284
+ if is_text is not None:
285
+ self._includefile_overrides["is_text"] = is_text
286
+ if encoding is not None:
287
+ self._includefile_overrides["encoding"] = encoding
288
+ self._parser = parser
289
+ # NOTA: Right now, there is an issue where these can't be overridden by config
290
+ # in all circumstances. Ignoring for now.
291
+ super(IncludeFile, self).__init__(
292
+ name,
293
+ required=required,
294
+ help=help,
295
+ type=FilePathClass(
296
+ self._includefile_overrides.get("is_text", True),
297
+ self._includefile_overrides.get("encoding", "utf-8"),
298
+ ),
299
+ **kwargs,
300
+ )
301
+
302
+ def init(self, ignore_errors=False):
303
+ super(IncludeFile, self).init(ignore_errors)
304
+
305
+ # This will use the values set explicitly in the args if present, else will
306
+ # use and remove from kwargs else will use True/utf-8
307
+ is_text = self._includefile_overrides.get(
308
+ "is_text", self.kwargs.pop("is_text", True)
309
+ )
310
+ encoding = self._includefile_overrides.get(
311
+ "encoding", self.kwargs.pop("encoding", "utf-8")
312
+ )
313
+
275
314
  # If a default is specified, it needs to be uploaded when the flow is deployed
276
315
  # (for example when doing a `step-functions create`) so we make the default
277
316
  # be a DeployTimeField. This means that it will be evaluated in two cases:
@@ -281,7 +320,7 @@ class IncludeFile(Parameter):
281
320
  # In the first case, we will need to fully upload the file whereas in the
282
321
  # second case, we can just return the string as the FilePath.convert method
283
322
  # will take care of evaluating things.
284
- v = kwargs.get("default")
323
+ v = self.kwargs.get("default")
285
324
  if v is not None:
286
325
  # If the default is a callable, we have two DeployTimeField:
287
326
  # - the callable nature of the default will require us to "call" the default
@@ -294,27 +333,32 @@ class IncludeFile(Parameter):
294
333
  # (call the default)
295
334
  if callable(v) and not isinstance(v, DeployTimeField):
296
335
  # If default is a callable, make it a DeployTimeField (the inner one)
297
- v = DeployTimeField(name, str, "default", v, return_str=True)
298
- kwargs["default"] = DeployTimeField(
299
- name,
336
+ v = DeployTimeField(self.name, str, "default", v, return_str=True)
337
+ self.kwargs["default"] = DeployTimeField(
338
+ self.name,
300
339
  str,
301
340
  "default",
302
341
  IncludeFile._eval_default(is_text, encoding, v),
303
342
  print_representation=v,
304
343
  )
305
344
 
306
- super(IncludeFile, self).__init__(
307
- name,
308
- required=required,
309
- help=help,
310
- type=FilePathClass(is_text, encoding),
311
- **kwargs,
312
- )
313
-
314
345
  def load_parameter(self, v):
315
346
  if v is None:
316
347
  return v
317
- return v.decode(self.name, var_type="Parameter")
348
+
349
+ # Get the raw content from the file
350
+ content = v.decode(self.name, var_type="Parameter")
351
+ # If a parser is specified, use it to parse the content
352
+ if self._parser is not None:
353
+ try:
354
+ return ConfigInput._call_parser(self._parser, content)
355
+ except Exception as e:
356
+ raise MetaflowException(
357
+ "Failed to parse content in parameter '%s' using parser: %s"
358
+ % (self.name, str(e))
359
+ ) from e
360
+
361
+ return content
318
362
 
319
363
  @staticmethod
320
364
  def _eval_default(is_text, encoding, default_path):
@@ -425,7 +469,7 @@ class UploaderV1:
425
469
  if prefix_pos < 0:
426
470
  raise MetaflowException("Malformed URL: '%s'" % url)
427
471
  prefix = url[:prefix_pos]
428
- handler = DATACLIENTS.get(prefix)
472
+ handler = _dict_dataclients.get(prefix)
429
473
  if handler is None:
430
474
  raise MetaflowException("Could not find data client for '%s'" % prefix)
431
475
  return handler
@@ -437,7 +481,7 @@ class UploaderV2:
437
481
  @classmethod
438
482
  def encode_url(cls, url_type, url, **kwargs):
439
483
  return_value = {
440
- "note": "Internal representation of IncludeFile(%s)" % url,
484
+ "note": "Internal representation of IncludeFile",
441
485
  "type": cls.file_type,
442
486
  "sub-type": url_type,
443
487
  "url": url,
@@ -450,7 +494,7 @@ class UploaderV2:
450
494
  r = UploaderV1.store(flow_name, path, is_text, encoding, handler, echo)
451
495
 
452
496
  # In V2, we store size for faster access
453
- r["note"] = "Internal representation of IncludeFile(%s)" % path
497
+ r["note"] = "Internal representation of IncludeFile"
454
498
  r["type"] = cls.file_type
455
499
  r["sub-type"] = "uploaded"
456
500
  r["size"] = os.stat(path).st_size