ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/task.py CHANGED
@@ -4,13 +4,17 @@ import math
4
4
  import sys
5
5
  import os
6
6
  import time
7
+ import traceback
7
8
 
8
9
  from types import MethodType, FunctionType
9
10
 
11
+ from metaflow.sidecar import Message, MessageTypes
10
12
  from metaflow.datastore.exceptions import DataException
11
13
 
14
+ from metaflow.plugins import METADATA_PROVIDERS
12
15
  from .metaflow_config import MAX_ATTEMPTS
13
- from .metadata import MetaDatum
16
+ from .metadata_provider import MetaDatum
17
+ from .metaflow_profile import from_start
14
18
  from .mflog import TASK_LOG_SOURCE
15
19
  from .datastore import Inputs, TaskDataStoreSet
16
20
  from .exception import (
@@ -22,19 +26,10 @@ from .unbounded_foreach import UBF_CONTROL
22
26
  from .util import all_equal, get_username, resolve_identity, unicode_type
23
27
  from .clone_util import clone_task_helper
24
28
  from .metaflow_current import current
29
+ from metaflow.user_configs.config_parameters import ConfigValue
30
+ from metaflow.system import _system_logger, _system_monitor
25
31
  from metaflow.tracing import get_trace_id
26
- from metaflow.util import namedtuple_with_defaults
27
-
28
- foreach_frame_field_list = [
29
- ("step", str),
30
- ("var", str),
31
- ("num_splits", int),
32
- ("index", int),
33
- ("value", str),
34
- ]
35
- ForeachFrame = namedtuple_with_defaults(
36
- "ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list))
37
- )
32
+ from metaflow.tuple_util import ForeachFrame
38
33
 
39
34
  # Maximum number of characters of the foreach path that we store in the metadata.
40
35
  MAX_FOREACH_PATH_LENGTH = 256
@@ -55,6 +50,8 @@ class MetaflowTask(object):
55
50
  event_logger,
56
51
  monitor,
57
52
  ubf_context,
53
+ orig_flow_datastore=None,
54
+ spin_artifacts=None,
58
55
  ):
59
56
  self.flow = flow
60
57
  self.flow_datastore = flow_datastore
@@ -64,12 +61,126 @@ class MetaflowTask(object):
64
61
  self.event_logger = event_logger
65
62
  self.monitor = monitor
66
63
  self.ubf_context = ubf_context
67
-
68
- def _exec_step_function(self, step_function, input_obj=None):
69
- if input_obj is None:
70
- step_function()
71
- else:
72
- step_function(input_obj)
64
+ self.orig_flow_datastore = orig_flow_datastore
65
+ self.spin_artifacts = spin_artifacts
66
+
67
+ def _exec_step_function(self, step_function, orig_step_func, input_obj=None):
68
+ wrappers_stack = []
69
+ wrapped_func = None
70
+
71
+ # Will set to non-Falsy if we need to fake calling `self.next`
72
+ # This is used when skipping the step.
73
+ # If a dictionary, it will
74
+ # contain the arguments to pass to `self.next`. If
75
+ # True, it means we are using whatever the usual
76
+ # arguments to `self.next` are for this step.
77
+ fake_next_call_args = False
78
+ raised_exception = None
79
+ had_raised_exception = False
80
+
81
+ # If we have wrappers w1, w2 and w3, we need to execute
82
+ # - w3_pre
83
+ # - w2_pre
84
+ # - w1_pre
85
+ # - step_function
86
+ # - w1_post
87
+ # - w2_post
88
+ # - w3_post
89
+ # in that order. We do this by maintaining a stack of generators.
90
+ # Note that if any of the pre functions returns a function, we execute that
91
+ # instead of the rest of the inside part. This is useful if you want to create
92
+ # no-op function for example.
93
+ for w in reversed(orig_step_func.wrappers):
94
+ wrapped_func = w.pre_step(orig_step_func.name, self.flow, input_obj)
95
+ wrappers_stack.append(w)
96
+ if w.skip_step:
97
+ # We are not going to run anything so we will have to fake calling
98
+ # next.
99
+ fake_next_call_args = w.skip_step
100
+ break
101
+ if wrapped_func:
102
+ break # We have nothing left to do since we now execute the
103
+ # wrapped function
104
+ # Else, we continue down the list of wrappers
105
+ try:
106
+ # fake_next_call is used here to also indicate that the step was skipped
107
+ # so we do not execute anything.
108
+ if not fake_next_call_args:
109
+ if input_obj is None:
110
+ if wrapped_func:
111
+ fake_next_call_args = wrapped_func(self.flow)
112
+ else:
113
+ step_function()
114
+ else:
115
+ if wrapped_func:
116
+ fake_next_call_args = wrapped_func(self.flow, input_obj)
117
+ else:
118
+ step_function(input_obj)
119
+ except Exception as ex:
120
+ raised_exception = ex
121
+ had_raised_exception = True
122
+
123
+ # We back out of the stack of generators
124
+ for w in reversed(wrappers_stack):
125
+ try:
126
+ r = w.post_step(orig_step_func.name, self.flow, raised_exception)
127
+ except Exception as ex:
128
+ r = ex
129
+ if r is None:
130
+ raised_exception = None
131
+ elif isinstance(r, Exception):
132
+ raised_exception = r
133
+ elif isinstance(r, tuple):
134
+ if len(r) == 2:
135
+ raised_exception, fake_next_call_args = r
136
+ else:
137
+ # The last argument is an exception to be re-raised. Used in
138
+ # user_step_decorator's post_step
139
+ raise r[2]
140
+ else:
141
+ raise RuntimeError(
142
+ "Invalid return value from a UserStepDecorator. Expected an"
143
+ "exception or an exception and arguments for self.next, got: %s" % r
144
+ )
145
+ if raised_exception:
146
+ # We have an exception that we need to propagate
147
+ raise raised_exception
148
+
149
+ if fake_next_call_args or had_raised_exception:
150
+ # We want to override the next call or we caught an exception (in which
151
+ # case the regular step code didn't call self.next). In this case,
152
+ # we need to set the transition variables
153
+ # properly. We call the next function as needed
154
+ # We also do this in case we want to gobble the exception.
155
+ graph_node = self.flow._graph[orig_step_func.name]
156
+ out_funcs = [getattr(self.flow, f) for f in graph_node.out_funcs]
157
+ if out_funcs:
158
+ self.flow._transition = None
159
+ if isinstance(fake_next_call_args, dict) and fake_next_call_args:
160
+ # Not an empty dictionary -- we use this as arguments for the next
161
+ # call
162
+ self.flow.next(*out_funcs, **fake_next_call_args)
163
+ elif (
164
+ fake_next_call_args == True
165
+ or fake_next_call_args == {}
166
+ or had_raised_exception
167
+ ):
168
+ # We need to extract things from the self.next. This is not possible
169
+ # in the case where there was a num_parallel.
170
+ if graph_node.parallel_foreach:
171
+ raise RuntimeError(
172
+ "Skipping a parallel foreach step without providing "
173
+ "the arguments to the self.next call is not supported. "
174
+ )
175
+ if graph_node.foreach_param:
176
+ self.flow.next(*out_funcs, foreach=graph_node.foreach_param)
177
+ else:
178
+ self.flow.next(*out_funcs)
179
+ else:
180
+ raise RuntimeError(
181
+ "Invalid value passed to self.next; expected "
182
+ " bool of a dictionary; got: %s" % fake_next_call_args
183
+ )
73
184
 
74
185
  def _init_parameters(self, parameter_ds, passdown=True):
75
186
  cls = self.flow.__class__
@@ -128,7 +239,6 @@ class MetaflowTask(object):
128
239
  lambda _, parameter_ds=parameter_ds: parameter_ds["_graph_info"],
129
240
  )
130
241
  all_vars.append("_graph_info")
131
-
132
242
  if passdown:
133
243
  self.flow._datastore.passdown_partial(parameter_ds, all_vars)
134
244
  return param_only_vars
@@ -144,6 +254,7 @@ class MetaflowTask(object):
144
254
  # Prefetch 'foreach' related artifacts to improve time taken by
145
255
  # _init_foreach.
146
256
  prefetch_data_artifacts = [
257
+ "_iteration_stack",
147
258
  "_foreach_stack",
148
259
  "_foreach_num_splits",
149
260
  "_foreach_var",
@@ -155,6 +266,9 @@ class MetaflowTask(object):
155
266
  run_id,
156
267
  pathspecs=input_paths,
157
268
  prefetch_data_artifacts=prefetch_data_artifacts,
269
+ join_type=join_type,
270
+ orig_flow_datastore=self.orig_flow_datastore,
271
+ spin_artifacts=self.spin_artifacts,
158
272
  )
159
273
  ds_list = [ds for ds in datastore_set]
160
274
  if len(ds_list) != len(input_paths):
@@ -166,10 +280,27 @@ class MetaflowTask(object):
166
280
  # initialize directly in the single input case.
167
281
  ds_list = []
168
282
  for input_path in input_paths:
169
- run_id, step_name, task_id = input_path.split("/")
283
+ parts = input_path.split("/")
284
+ if len(parts) == 3:
285
+ run_id, step_name, task_id = parts
286
+ attempt = None
287
+ else:
288
+ run_id, step_name, task_id, attempt = parts
289
+ attempt = int(attempt)
290
+
170
291
  ds_list.append(
171
- self.flow_datastore.get_task_datastore(run_id, step_name, task_id)
292
+ self.flow_datastore.get_task_datastore(
293
+ run_id,
294
+ step_name,
295
+ task_id,
296
+ attempt=attempt,
297
+ join_type=join_type,
298
+ orig_flow_datastore=self.orig_flow_datastore,
299
+ spin_artifacts=self.spin_artifacts,
300
+ )
172
301
  )
302
+ from_start("MetaflowTask: got datastore for input path %s" % input_path)
303
+
173
304
  if not ds_list:
174
305
  # this guards against errors in input paths
175
306
  raise MetaflowDataMissing(
@@ -188,6 +319,10 @@ class MetaflowTask(object):
188
319
  # 2) join - pop the topmost frame from the stack
189
320
  # 3) step following a split - push a new frame in the stack
190
321
 
322
+ # We have a non-modifying case (case 4)) where we propagate the
323
+ # foreach-stack information to all tasks in the foreach. This is
324
+ # then used later to write the foreach-stack metadata for that task
325
+
191
326
  # case 1) - reset the stack
192
327
  if step_name == "start":
193
328
  self.flow._foreach_stack = []
@@ -272,6 +407,59 @@ class MetaflowTask(object):
272
407
  stack = inputs[0]["_foreach_stack"]
273
408
  stack.append(frame)
274
409
  self.flow._foreach_stack = stack
410
+ # case 4) - propagate in the foreach nest
411
+ elif "_foreach_stack" in inputs[0]:
412
+ self.flow._foreach_stack = inputs[0]["_foreach_stack"]
413
+
414
+ def _init_iteration(self, step_name, inputs, is_recursive_step):
415
+ # We track the iteration "stack" for loops. At this time, we
416
+ # only support one type of "looping" which is a recursive step but
417
+ # this can generalize to arbitrary well-scoped loops in the future.
418
+
419
+ # _iteration_stack will contain the iteration count for each loop
420
+ # level. Currently, there will be only no elements (no loops) or
421
+ # a single element (a single recursive step).
422
+
423
+ # We just need to determine the rules to add a new looping level,
424
+ # increment the looping level or pop the looping level. In our
425
+ # current support for only recursive steps, this is pretty straightforward:
426
+ # 1) if is_recursive_step:
427
+ # - we are entering a loop -- we are either entering for the first time
428
+ # or we are continuing the loop. Note that a recursive step CANNOT
429
+ # be a join step so there is always a single input
430
+ # 1a) If inputs[0]["_iteration_stack"] contains an element, we are looping
431
+ # so we increment the count
432
+ # 1b) If inputs[0]["_iteration_stack"] is empty, this is the first time we
433
+ # are entering the loop so we set the iteration count to 0
434
+ # 2) if it is not a recursive step, we need to determine if this is the step
435
+ # *after* the recursive step. The easiest way to determine that is to
436
+ # look at all inputs (there can be multiple in case of a join) and pop
437
+ # _iteration_stack if it is set. However, since we know that non recursive
438
+ # steps are *never* part of an iteration, we can simplify and just set it
439
+ # to [] without even checking anything. We will have to revisit this if/when
440
+ # more complex loop structures are supported.
441
+
442
+ # Note that just like _foreach_stack, we need to set _iteration_stack to *something*
443
+ # so that it doesn't get clobbered weirdly by merge_artifacts.
444
+
445
+ if is_recursive_step:
446
+ # Case 1)
447
+ if len(inputs) != 1:
448
+ raise MetaflowInternalError(
449
+ "Step *%s* is a recursive step but got multiple inputs." % step_name
450
+ )
451
+ inp = inputs[0]
452
+ if "_iteration_stack" not in inp or not inp["_iteration_stack"]:
453
+ # Case 1b)
454
+ self.flow._iteration_stack = [0]
455
+ else:
456
+ # Case 1a)
457
+ stack = inp["_iteration_stack"]
458
+ stack[-1] += 1
459
+ self.flow._iteration_stack = stack
460
+ else:
461
+ # Case 2)
462
+ self.flow._iteration_stack = []
275
463
 
276
464
  def _clone_flow(self, datastore):
277
465
  x = self.flow.__class__(use_cli=False)
@@ -291,25 +479,40 @@ class MetaflowTask(object):
291
479
  "task.clone_only needs a valid clone_origin_task value."
292
480
  )
293
481
  origin_run_id, _, origin_task_id = clone_origin_task.split("/")
294
-
295
- msg = {
296
- "task_id": task_id,
297
- "msg": "Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
298
- self.flow.name,
299
- origin_run_id,
300
- step_name,
301
- origin_task_id,
302
- self.flow.name,
303
- run_id,
304
- step_name,
305
- task_id,
306
- ),
307
- "step_name": step_name,
482
+ # Update system logger and monitor context
483
+ # We also pass this context as part of the task payload to support implementations that
484
+ # can't access the context directly
485
+ task_payload = {
308
486
  "run_id": run_id,
309
- "flow_name": self.flow.name,
310
- "ts": round(time.time()),
487
+ "step_name": step_name,
488
+ "task_id": task_id,
489
+ "retry_count": retry_count,
490
+ "project_name": current.get("project_name"),
491
+ "branch_name": current.get("branch_name"),
492
+ "is_user_branch": current.get("is_user_branch"),
493
+ "is_production": current.get("is_production"),
494
+ "project_flow_name": current.get("project_flow_name"),
495
+ "origin_run_id": origin_run_id,
496
+ "origin_task_id": origin_task_id,
311
497
  }
312
- self.event_logger.log(msg)
498
+
499
+ msg = "Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
500
+ self.flow.name,
501
+ origin_run_id,
502
+ step_name,
503
+ origin_task_id,
504
+ self.flow.name,
505
+ run_id,
506
+ step_name,
507
+ task_id,
508
+ )
509
+ with _system_monitor.count("metaflow.task.clone"):
510
+ _system_logger.log_event(
511
+ level="info",
512
+ module="metaflow.task",
513
+ name="clone",
514
+ payload={**task_payload, "msg": msg},
515
+ )
313
516
  # If we actually have to do the clone ourselves, proceed...
314
517
  clone_task_helper(
315
518
  self.flow.name,
@@ -368,6 +571,8 @@ class MetaflowTask(object):
368
571
  split_index,
369
572
  retry_count,
370
573
  max_user_code_retries,
574
+ whitelist_decorators=None,
575
+ persist=True,
371
576
  ):
372
577
  if run_id and task_id:
373
578
  self.metadata.register_run_id(run_id)
@@ -426,7 +631,14 @@ class MetaflowTask(object):
426
631
 
427
632
  step_func = getattr(self.flow, step_name)
428
633
  decorators = step_func.decorators
429
-
634
+ if self.orig_flow_datastore:
635
+ # We filter only the whitelisted decorators in case of spin step.
636
+ decorators = (
637
+ []
638
+ if not whitelist_decorators
639
+ else [deco for deco in decorators if deco.name in whitelist_decorators]
640
+ )
641
+ from_start("MetaflowTask: decorators initialized")
430
642
  node = self.flow._graph[step_name]
431
643
  join_type = None
432
644
  if node.type == "join":
@@ -434,17 +646,26 @@ class MetaflowTask(object):
434
646
 
435
647
  # 1. initialize output datastore
436
648
  output = self.flow_datastore.get_task_datastore(
437
- run_id, step_name, task_id, attempt=retry_count, mode="w"
649
+ run_id, step_name, task_id, attempt=retry_count, mode="w", persist=persist
438
650
  )
439
651
 
440
652
  output.init_task()
653
+ from_start("MetaflowTask: output datastore initialized")
441
654
 
442
655
  if input_paths:
443
656
  # 2. initialize input datastores
444
657
  inputs = self._init_data(run_id, join_type, input_paths)
658
+ from_start("MetaflowTask: input datastores initialized")
445
659
 
446
660
  # 3. initialize foreach state
447
661
  self._init_foreach(step_name, join_type, inputs, split_index)
662
+ from_start("MetaflowTask: foreach state initialized")
663
+
664
+ # 4. initialize the iteration state
665
+ is_recursive_step = (
666
+ node.type == "split-switch" and step_name in node.out_funcs
667
+ )
668
+ self._init_iteration(step_name, inputs, is_recursive_step)
448
669
 
449
670
  # Add foreach stack to metadata of the task
450
671
 
@@ -479,6 +700,25 @@ class MetaflowTask(object):
479
700
  )
480
701
  )
481
702
 
703
+ # Add runtime dag information to the metadata of the task
704
+ foreach_execution_path = ",".join(
705
+ [
706
+ "{}:{}".format(foreach_frame.step, foreach_frame.index)
707
+ for foreach_frame in foreach_stack
708
+ ]
709
+ )
710
+ if foreach_execution_path:
711
+ metadata.extend(
712
+ [
713
+ MetaDatum(
714
+ field="foreach-execution-path",
715
+ value=foreach_execution_path,
716
+ type="foreach-execution-path",
717
+ tags=metadata_tags,
718
+ ),
719
+ ]
720
+ )
721
+ from_start("MetaflowTask: finished input processing")
482
722
  self.metadata.register_metadata(
483
723
  run_id,
484
724
  step_name,
@@ -496,8 +736,7 @@ class MetaflowTask(object):
496
736
  origin_run_id=origin_run_id,
497
737
  namespace=resolve_identity(),
498
738
  username=get_username(),
499
- metadata_str="%s@%s"
500
- % (self.metadata.__class__.TYPE, self.metadata.__class__.INFO),
739
+ metadata_str=self.metadata.metadata_str(),
501
740
  is_running=True,
502
741
  tags=self.metadata.sticky_tags,
503
742
  )
@@ -506,6 +745,9 @@ class MetaflowTask(object):
506
745
  output.save_metadata(
507
746
  {
508
747
  "task_begin": {
748
+ "code_package_metadata": os.environ.get(
749
+ "METAFLOW_CODE_METADATA", ""
750
+ ),
509
751
  "code_package_sha": os.environ.get("METAFLOW_CODE_SHA"),
510
752
  "code_package_ds": os.environ.get("METAFLOW_CODE_DS"),
511
753
  "code_package_url": os.environ.get("METAFLOW_CODE_URL"),
@@ -513,204 +755,258 @@ class MetaflowTask(object):
513
755
  }
514
756
  }
515
757
  )
516
- logger = self.event_logger
758
+
759
+ # 6. Update system logger and monitor context
760
+ # We also pass this context as part of the task payload to support implementations that
761
+ # can't access the context directly
762
+
763
+ task_payload = {
764
+ "run_id": run_id,
765
+ "step_name": step_name,
766
+ "task_id": task_id,
767
+ "retry_count": retry_count,
768
+ "project_name": current.get("project_name"),
769
+ "branch_name": current.get("branch_name"),
770
+ "is_user_branch": current.get("is_user_branch"),
771
+ "is_production": current.get("is_production"),
772
+ "project_flow_name": current.get("project_flow_name"),
773
+ "trace_id": trace_id or None,
774
+ }
775
+
776
+ from_start("MetaflowTask: task metadata initialized")
517
777
  start = time.time()
518
778
  self.metadata.start_task_heartbeat(self.flow.name, run_id, step_name, task_id)
519
- try:
520
- msg = {
521
- "task_id": task_id,
522
- "msg": "task starting",
523
- "step_name": step_name,
524
- "run_id": run_id,
525
- "flow_name": self.flow.name,
526
- "ts": round(time.time()),
527
- }
528
- logger.log(msg)
529
-
530
- self.flow._current_step = step_name
531
- self.flow._success = False
532
- self.flow._task_ok = None
533
- self.flow._exception = None
534
- # Note: All internal flow attributes (ie: non-user artifacts)
535
- # should either be set prior to running the user code or listed in
536
- # FlowSpec._EPHEMERAL to allow for proper merging/importing of
537
- # user artifacts in the user's step code.
538
-
539
- if join_type:
540
- # Join step:
541
-
542
- # Ensure that we have the right number of inputs. The
543
- # foreach case is checked above.
544
- if join_type != "foreach" and len(inputs) != len(node.in_funcs):
545
- raise MetaflowDataMissing(
546
- "Join *%s* expected %d "
547
- "inputs but only %d inputs "
548
- "were found" % (step_name, len(node.in_funcs), len(inputs))
779
+ from_start("MetaflowTask: heartbeat started")
780
+ with self.monitor.measure("metaflow.task.duration"):
781
+ try:
782
+ with self.monitor.count("metaflow.task.start"):
783
+ _system_logger.log_event(
784
+ level="info",
785
+ module="metaflow.task",
786
+ name="start",
787
+ payload={**task_payload, "msg": "Task started"},
549
788
  )
550
789
 
551
- # Multiple input contexts are passed in as an argument
552
- # to the step function.
553
- input_obj = Inputs(self._clone_flow(inp) for inp in inputs)
554
- self.flow._set_datastore(output)
555
- # initialize parameters (if they exist)
556
- # We take Parameter values from the first input,
557
- # which is always safe since parameters are read-only
558
- current._update_env(
559
- {"parameter_names": self._init_parameters(inputs[0], passdown=True)}
560
- )
561
- else:
562
- # Linear step:
563
- # We are running with a single input context.
564
- # The context is embedded in the flow.
565
- if len(inputs) > 1:
566
- # This should be captured by static checking but
567
- # let's assert this again
568
- raise MetaflowInternalError(
569
- "Step *%s* is not a join "
570
- "step but it gets multiple "
571
- "inputs." % step_name
572
- )
573
- self.flow._set_datastore(inputs[0])
574
- if input_paths:
790
+ self.flow._current_step = step_name
791
+ self.flow._success = False
792
+ self.flow._task_ok = None
793
+ self.flow._exception = None
794
+
795
+ # Note: All internal flow attributes (ie: non-user artifacts)
796
+ # should either be set prior to running the user code or listed in
797
+ # FlowSpec._EPHEMERAL to allow for proper merging/importing of
798
+ # user artifacts in the user's step code.
799
+ if join_type:
800
+ # Join step:
801
+
802
+ # Ensure that we have the right number of inputs.
803
+ if join_type != "foreach":
804
+ # Find the corresponding split node from the graph.
805
+ split_node = self.flow._graph[node.split_parents[-1]]
806
+ # The number of expected inputs is the number of branches
807
+ # from that split -- we can't use in_funcs because there may
808
+ # be more due to split-switch branches that all converge here.
809
+ expected_inputs = len(split_node.out_funcs)
810
+
811
+ if len(inputs) != expected_inputs:
812
+ raise MetaflowDataMissing(
813
+ "Join *%s* expected %d inputs but only %d inputs "
814
+ "were found" % (step_name, expected_inputs, len(inputs))
815
+ )
816
+
817
+ # Multiple input contexts are passed in as an argument
818
+ # to the step function.
819
+ input_obj = Inputs(self._clone_flow(inp) for inp in inputs)
820
+ self.flow._set_datastore(output)
575
821
  # initialize parameters (if they exist)
576
822
  # We take Parameter values from the first input,
577
823
  # which is always safe since parameters are read-only
578
824
  current._update_env(
579
825
  {
580
826
  "parameter_names": self._init_parameters(
581
- inputs[0], passdown=False
582
- )
827
+ inputs[0], passdown=True
828
+ ),
829
+ "graph_info": self.flow._graph_info,
583
830
  }
584
831
  )
832
+ else:
833
+ # Linear step:
834
+ # We are running with a single input context.
835
+ # The context is embedded in the flow.
836
+ if len(inputs) > 1:
837
+ # This should be captured by static checking but
838
+ # let's assert this again
839
+ raise MetaflowInternalError(
840
+ "Step *%s* is not a join "
841
+ "step but it gets multiple "
842
+ "inputs." % step_name
843
+ )
844
+ self.flow._set_datastore(inputs[0])
845
+ if input_paths:
846
+ # initialize parameters (if they exist)
847
+ # We take Parameter values from the first input,
848
+ # which is always safe since parameters are read-only
849
+ current._update_env(
850
+ {
851
+ "parameter_names": self._init_parameters(
852
+ inputs[0], passdown=False
853
+ ),
854
+ "graph_info": self.flow._graph_info,
855
+ }
856
+ )
857
+ from_start("MetaflowTask: before pre-step decorators")
858
+ for deco in decorators:
859
+ if deco.name == "card" and self.orig_flow_datastore:
860
+ # if spin step and card decorator, pass spin metadata
861
+ metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][
862
+ 0
863
+ ](self.environment, self.flow, self.event_logger, self.monitor)
864
+ else:
865
+ metadata = self.metadata
866
+ deco.task_pre_step(
867
+ step_name,
868
+ output,
869
+ metadata,
870
+ run_id,
871
+ task_id,
872
+ self.flow,
873
+ self.flow._graph,
874
+ retry_count,
875
+ max_user_code_retries,
876
+ self.ubf_context,
877
+ inputs,
878
+ )
585
879
 
586
- for deco in decorators:
587
- deco.task_pre_step(
588
- step_name,
589
- output,
590
- self.metadata,
591
- run_id,
592
- task_id,
593
- self.flow,
594
- self.flow._graph,
595
- retry_count,
596
- max_user_code_retries,
597
- self.ubf_context,
598
- inputs,
599
- )
600
-
601
- for deco in decorators:
602
- # decorators can actually decorate the step function,
603
- # or they can replace it altogether. This functionality
604
- # is used e.g. by catch_decorator which switches to a
605
- # fallback code if the user code has failed too many
606
- # times.
607
- step_func = deco.task_decorate(
608
- step_func,
609
- self.flow,
610
- self.flow._graph,
611
- retry_count,
612
- max_user_code_retries,
613
- self.ubf_context,
614
- )
615
-
616
- if join_type:
617
- self._exec_step_function(step_func, input_obj)
618
- else:
619
- self._exec_step_function(step_func)
620
-
621
- for deco in decorators:
622
- deco.task_post_step(
623
- step_name,
624
- self.flow,
625
- self.flow._graph,
626
- retry_count,
627
- max_user_code_retries,
628
- )
629
-
630
- self.flow._task_ok = True
631
- self.flow._success = True
632
-
633
- except Exception as ex:
634
- tsk_msg = {
635
- "task_id": task_id,
636
- "exception_msg": str(ex),
637
- "msg": "task failed with exception",
638
- "step_name": step_name,
639
- "run_id": run_id,
640
- "flow_name": self.flow.name,
641
- }
642
- logger.log(tsk_msg)
643
-
644
- exception_handled = False
645
- for deco in decorators:
646
- res = deco.task_exception(
647
- ex,
648
- step_name,
649
- self.flow,
650
- self.flow._graph,
651
- retry_count,
652
- max_user_code_retries,
653
- )
654
- exception_handled = bool(res) or exception_handled
880
+ orig_step_func = step_func
881
+ for deco in decorators:
882
+ # decorators can actually decorate the step function,
883
+ # or they can replace it altogether. This functionality
884
+ # is used e.g. by catch_decorator which switches to a
885
+ # fallback code if the user code has failed too many
886
+ # times.
887
+ step_func = deco.task_decorate(
888
+ step_func,
889
+ self.flow,
890
+ self.flow._graph,
891
+ retry_count,
892
+ max_user_code_retries,
893
+ self.ubf_context,
894
+ )
895
+ from_start("MetaflowTask: finished decorator processing")
896
+ if join_type:
897
+ self._exec_step_function(step_func, orig_step_func, input_obj)
898
+ else:
899
+ self._exec_step_function(step_func, orig_step_func)
900
+ from_start("MetaflowTask: step function executed")
901
+ for deco in decorators:
902
+ deco.task_post_step(
903
+ step_name,
904
+ self.flow,
905
+ self.flow._graph,
906
+ retry_count,
907
+ max_user_code_retries,
908
+ )
655
909
 
656
- if exception_handled:
657
910
  self.flow._task_ok = True
658
- else:
659
- self.flow._task_ok = False
660
- self.flow._exception = MetaflowExceptionWrapper(ex)
661
- print("%s failed:" % self.flow, file=sys.stderr)
662
- raise
663
-
664
- finally:
665
- if self.ubf_context == UBF_CONTROL:
666
- self._finalize_control_task()
667
-
668
- end = time.time() - start
669
-
670
- msg = {
671
- "task_id": task_id,
672
- "msg": "task ending",
673
- "step_name": step_name,
674
- "run_id": run_id,
675
- "flow_name": self.flow.name,
676
- "ts": round(time.time()),
677
- "runtime": round(end),
678
- }
679
- logger.log(msg)
911
+ self.flow._success = True
912
+
913
+ except Exception as ex:
914
+ with self.monitor.count("metaflow.task.exception"):
915
+ _system_logger.log_event(
916
+ level="error",
917
+ module="metaflow.task",
918
+ name="exception",
919
+ payload={**task_payload, "msg": traceback.format_exc()},
920
+ )
680
921
 
681
- attempt_ok = str(bool(self.flow._task_ok))
682
- self.metadata.register_metadata(
683
- run_id,
684
- step_name,
685
- task_id,
686
- [
687
- MetaDatum(
688
- field="attempt_ok",
689
- value=attempt_ok,
690
- type="internal_attempt_status",
691
- tags=["attempt_id:{0}".format(retry_count)],
922
+ exception_handled = False
923
+ for deco in decorators:
924
+ res = deco.task_exception(
925
+ ex,
926
+ step_name,
927
+ self.flow,
928
+ self.flow._graph,
929
+ retry_count,
930
+ max_user_code_retries,
931
+ )
932
+ exception_handled = bool(res) or exception_handled
933
+
934
+ if exception_handled:
935
+ self.flow._task_ok = True
936
+ else:
937
+ self.flow._task_ok = False
938
+ self.flow._exception = MetaflowExceptionWrapper(ex)
939
+ print("%s failed:" % self.flow, file=sys.stderr)
940
+ raise
941
+
942
+ finally:
943
+ from_start("MetaflowTask: decorators finalized")
944
+ if self.ubf_context == UBF_CONTROL:
945
+ self._finalize_control_task()
946
+
947
+ # Emit metrics to logger/monitor sidecar implementations
948
+ with self.monitor.count("metaflow.task.end"):
949
+ _system_logger.log_event(
950
+ level="info",
951
+ module="metaflow.task",
952
+ name="end",
953
+ payload={**task_payload, "msg": "Task ended"},
954
+ )
955
+ try:
956
+ # persisting might fail due to unpicklable artifacts.
957
+ output.persist(self.flow)
958
+ except Exception as ex:
959
+ self.flow._task_ok = False
960
+ raise ex
961
+ finally:
962
+ # The attempt_ok metadata is used to determine task status so it is important
963
+ # we ensure that it is written even in case of preceding failures.
964
+ # f.ex. failing to serialize artifacts leads to a non-zero exit code for the process,
965
+ # even if user code finishes successfully. Flow execution will not continue due to the exit,
966
+ # so arguably we should mark the task as failed.
967
+ attempt_ok = str(bool(self.flow._task_ok))
968
+ self.metadata.register_metadata(
969
+ run_id,
970
+ step_name,
971
+ task_id,
972
+ [
973
+ MetaDatum(
974
+ field="attempt_ok",
975
+ value=attempt_ok,
976
+ type="internal_attempt_status",
977
+ tags=["attempt_id:{0}".format(retry_count)],
978
+ ),
979
+ ],
692
980
  )
693
- ],
694
- )
695
981
 
696
- output.save_metadata({"task_end": {}})
697
- output.persist(self.flow)
698
-
699
- # this writes a success marker indicating that the
700
- # "transaction" is done
701
- output.done()
702
-
703
- # final decorator hook: The task results are now
704
- # queryable through the client API / datastore
705
- for deco in decorators:
706
- deco.task_finished(
707
- step_name,
708
- self.flow,
709
- self.flow._graph,
710
- self.flow._task_ok,
711
- retry_count,
712
- max_user_code_retries,
713
- )
982
+ output.save_metadata({"task_end": {}})
983
+ from_start("MetaflowTask: output persisted")
984
+ # this writes a success marker indicating that the
985
+ # "transaction" is done
986
+ output.done()
987
+
988
+ # final decorator hook: The task results are now
989
+ # queryable through the client API / datastore
990
+ for deco in decorators:
991
+ deco.task_finished(
992
+ step_name,
993
+ self.flow,
994
+ self.flow._graph,
995
+ self.flow._task_ok,
996
+ retry_count,
997
+ max_user_code_retries,
998
+ )
714
999
 
715
- # terminate side cars
716
- self.metadata.stop_heartbeat()
1000
+ # terminate side cars
1001
+ self.metadata.stop_heartbeat()
1002
+
1003
+ # Task duration consists of the time taken to run the task as well as the time taken to
1004
+ # persist the task metadata and data to the datastore.
1005
+ duration = time.time() - start
1006
+ _system_logger.log_event(
1007
+ level="info",
1008
+ module="metaflow.task",
1009
+ name="duration",
1010
+ payload={**task_payload, "msg": str(duration)},
1011
+ )
1012
+ from_start("MetaflowTask: task run completed")