ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
1
+ import json
2
+ import os
3
+ from datetime import datetime, timezone
4
+
5
+ ###
6
+ # Algorithm to determine 1st error:
7
+ # ignore the failures where message = ""
8
+ # group the failures via templateName
9
+ # sort each group by finishedAt
10
+ # find the group for which the last finishedAt is earliest
11
+ # if the earliest message is "No more retries left" then
12
+ # get the n-1th message from that group
13
+ # else
14
+ # return the last message.
15
+ ###
16
+
17
+
18
+ def parse_workflow_failures():
19
+ failures = json.loads(
20
+ json.loads(os.getenv("METAFLOW_ARGO_WORKFLOW_FAILURES", "[]"), strict=False),
21
+ strict=False,
22
+ )
23
+ return [wf for wf in failures if wf.get("message")]
24
+
25
+
26
+ def group_failures_by_template(failures):
27
+ groups = {}
28
+ for failure in failures:
29
+ if failure.get("finishedAt", None) is None:
30
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
31
+ failure["finishedAt"] = timestamp
32
+ groups.setdefault(failure["templateName"], []).append(failure)
33
+ return groups
34
+
35
+
36
+ def sort_by_finished_at(items):
37
+ return sorted(
38
+ items, key=lambda x: datetime.strptime(x["finishedAt"], "%Y-%m-%dT%H:%M:%SZ")
39
+ )
40
+
41
+
42
+ def find_earliest_last_finished_group(groups):
43
+ return min(
44
+ groups,
45
+ key=lambda k: datetime.strptime(
46
+ groups[k][-1]["finishedAt"], "%Y-%m-%dT%H:%M:%SZ"
47
+ ),
48
+ )
49
+
50
+
51
+ def determine_first_error():
52
+ failures = parse_workflow_failures()
53
+ if not failures:
54
+ return None
55
+
56
+ grouped_failures = group_failures_by_template(failures)
57
+ for group in grouped_failures.values():
58
+ group.sort(
59
+ key=lambda g: datetime.strptime(g["finishedAt"], "%Y-%m-%dT%H:%M:%SZ")
60
+ )
61
+
62
+ earliest_group = grouped_failures[
63
+ find_earliest_last_finished_group(grouped_failures)
64
+ ]
65
+
66
+ if earliest_group[-1]["message"] == "No more retries left":
67
+ return earliest_group[-2]
68
+ return earliest_group[-1]
69
+
70
+
71
+ if __name__ == "__main__":
72
+ first_err = determine_first_error()
73
+ print(json.dumps(first_err, indent=2))
@@ -0,0 +1,35 @@
1
+ from math import inf
2
+ import sys
3
+ from metaflow.util import decompress_list, compress_list
4
+ import base64
5
+
6
+
7
+ def generate_input_paths(input_paths, skippable_steps):
8
+ # => run_id/step/:foo,bar
9
+ # input_paths are base64 encoded due to Argo shenanigans
10
+ decoded = base64.b64decode(input_paths).decode("utf-8")
11
+ paths = decompress_list(decoded)
12
+
13
+ # some of the paths are going to be malformed due to never having executed per conditional.
14
+ # strip these out of the list.
15
+
16
+ # all pathspecs of leading steps that executed.
17
+ trimmed = [path for path in paths if not "{{" in path]
18
+
19
+ # pathspecs of leading steps that are conditional, and should be used instead of non-conditional ones
20
+ # e.g. the case of skipping switches: start -> case_step -> conditional_a or end
21
+ conditionals = [
22
+ path for path in trimmed if not any(step in path for step in skippable_steps)
23
+ ]
24
+ pathspecs_to_use = conditionals if conditionals else trimmed
25
+ return compress_list(pathspecs_to_use, zlibmin=inf)
26
+
27
+
28
+ if __name__ == "__main__":
29
+ input_paths = sys.argv[1]
30
+ try:
31
+ skippable_steps = sys.argv[2].split(",")
32
+ except IndexError:
33
+ skippable_steps = []
34
+
35
+ print(generate_input_paths(input_paths, skippable_steps))
@@ -0,0 +1,209 @@
1
+ from collections import defaultdict
2
+ import json
3
+ from typing import Dict, List, Optional
4
+
5
+
6
+ class JsonSerializable(object):
7
+ def to_json(self):
8
+ return self.payload
9
+
10
+ def __str__(self):
11
+ return json.dumps(self.payload, indent=4)
12
+
13
+
14
+ class _LifecycleHook(JsonSerializable):
15
+ # https://argoproj.github.io/argo-workflows/fields/#lifecyclehook
16
+
17
+ def __init__(self, name):
18
+ tree = lambda: defaultdict(tree)
19
+ self.name = name
20
+ self.payload = tree()
21
+
22
+ def expression(self, expression):
23
+ self.payload["expression"] = str(expression)
24
+ return self
25
+
26
+ def template(self, template):
27
+ self.payload["template"] = template
28
+ return self
29
+
30
+
31
+ class _Template(JsonSerializable):
32
+ # https://argoproj.github.io/argo-workflows/fields/#template
33
+
34
+ def __init__(self, name):
35
+ tree = lambda: defaultdict(tree)
36
+ self.name = name
37
+ self.payload = tree()
38
+ self.payload["name"] = name
39
+
40
+ def http(self, http):
41
+ self.payload["http"] = http.to_json()
42
+ return self
43
+
44
+ def script(self, script):
45
+ self.payload["script"] = script.to_json()
46
+ return self
47
+
48
+ def container(self, container):
49
+ self.payload["container"] = container
50
+ return self
51
+
52
+ def service_account_name(self, service_account_name):
53
+ self.payload["serviceAccountName"] = service_account_name
54
+ return self
55
+
56
+
57
+ class Hook(object):
58
+ """
59
+ Abstraction for Argo Workflows exit hooks.
60
+ A hook consists of a Template, and one or more LifecycleHooks that trigger the template
61
+ """
62
+
63
+ template: "_Template"
64
+ lifecycle_hooks: List["_LifecycleHook"]
65
+
66
+
67
+ class _HttpSpec(JsonSerializable):
68
+ # https://argoproj.github.io/argo-workflows/fields/#http
69
+
70
+ def __init__(self, method):
71
+ tree = lambda: defaultdict(tree)
72
+ self.payload = tree()
73
+ self.payload["method"] = method
74
+ self.payload["headers"] = []
75
+
76
+ def header(self, header, value):
77
+ self.payload["headers"].append({"name": header, "value": value})
78
+ return self
79
+
80
+ def body(self, body):
81
+ self.payload["body"] = str(body)
82
+ return self
83
+
84
+ def url(self, url):
85
+ self.payload["url"] = url
86
+ return self
87
+
88
+ def success_condition(self, success_condition):
89
+ self.payload["successCondition"] = success_condition
90
+ return self
91
+
92
+
93
+ # HTTP hook
94
+ class HttpExitHook(Hook):
95
+ def __init__(
96
+ self,
97
+ name: str,
98
+ url: str,
99
+ method: str = "GET",
100
+ headers: Optional[Dict] = None,
101
+ body: Optional[str] = None,
102
+ on_success: bool = False,
103
+ on_error: bool = False,
104
+ ):
105
+ self.template = _Template(name)
106
+ http = _HttpSpec(method).url(url)
107
+ if headers is not None:
108
+ for header, value in headers.items():
109
+ http.header(header, value)
110
+
111
+ if body is not None:
112
+ http.body(body)
113
+
114
+ self.template.http(http)
115
+
116
+ self.lifecycle_hooks = []
117
+
118
+ if on_success and on_error:
119
+ raise Exception("Set only one of the on_success/on_error at a time.")
120
+
121
+ if on_success:
122
+ self.lifecycle_hooks.append(
123
+ _LifecycleHook(name)
124
+ .expression("workflow.status == 'Succeeded'")
125
+ .template(self.template.name)
126
+ )
127
+
128
+ if on_error:
129
+ self.lifecycle_hooks.append(
130
+ _LifecycleHook(name)
131
+ .expression("workflow.status == 'Error' || workflow.status == 'Failed'")
132
+ .template(self.template.name)
133
+ )
134
+
135
+ if not on_success and not on_error:
136
+ # add an expressionless lifecycle hook
137
+ self.lifecycle_hooks.append(_LifecycleHook(name).template(name))
138
+
139
+
140
+ class ExitHookHack(Hook):
141
+ # Warning: terrible hack to workaround a bug in Argo Workflow where the
142
+ # templates listed above do not execute unless there is an
143
+ # explicit exit hook. as and when this bug is patched, we should
144
+ # remove this effectively no-op template.
145
+ # Note: We use the Http template because changing this to an actual no-op container had the side-effect of
146
+ # leaving LifecycleHooks in a pending state even when they have finished execution.
147
+ def __init__(
148
+ self,
149
+ url,
150
+ headers=None,
151
+ body=None,
152
+ ):
153
+ self.template = _Template("exit-hook-hack")
154
+ http = _HttpSpec("GET").url(url)
155
+ if headers is not None:
156
+ for header, value in headers.items():
157
+ http.header(header, value)
158
+
159
+ if body is not None:
160
+ http.body(json.dumps(body))
161
+
162
+ http.success_condition("true == true")
163
+
164
+ self.template.http(http)
165
+
166
+ self.lifecycle_hooks = []
167
+
168
+ # add an expressionless lifecycle hook
169
+ self.lifecycle_hooks.append(_LifecycleHook("exit").template("exit-hook-hack"))
170
+
171
+
172
+ class ContainerHook(Hook):
173
+ def __init__(
174
+ self,
175
+ name: str,
176
+ container: Dict,
177
+ service_account_name: str = None,
178
+ on_success: bool = False,
179
+ on_error: bool = False,
180
+ ):
181
+ self.template = _Template(name)
182
+
183
+ if service_account_name is not None:
184
+ self.template.service_account_name(service_account_name)
185
+
186
+ self.template.container(container)
187
+
188
+ self.lifecycle_hooks = []
189
+
190
+ if on_success and on_error:
191
+ raise Exception("Set only one of the on_success/on_error at a time.")
192
+
193
+ if on_success:
194
+ self.lifecycle_hooks.append(
195
+ _LifecycleHook(name)
196
+ .expression("workflow.status == 'Succeeded'")
197
+ .template(self.template.name)
198
+ )
199
+
200
+ if on_error:
201
+ self.lifecycle_hooks.append(
202
+ _LifecycleHook(name)
203
+ .expression("workflow.status == 'Error' || workflow.status == 'Failed'")
204
+ .template(self.template.name)
205
+ )
206
+
207
+ if not on_success and not on_error:
208
+ # add an expressionless lifecycle hook
209
+ self.lifecycle_hooks.append(_LifecycleHook(name).template(name))
@@ -0,0 +1,15 @@
1
+ import sys
2
+
3
+
4
+ def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):
5
+ # => run_id/step/:foo,bar
6
+ control_id = "control-{}-0".format(task_id_entropy)
7
+ worker_ids = [
8
+ "worker-{}-{}".format(task_id_entropy, i) for i in range(int(num_parallel) - 1)
9
+ ]
10
+ ids = [control_id] + worker_ids
11
+ return "{}/{}/:{}".format(run_id, step_name, ",".join(ids))
12
+
13
+
14
+ if __name__ == "__main__":
15
+ print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))
@@ -0,0 +1,19 @@
1
+ import sys
2
+ import base64
3
+ import json
4
+
5
+
6
+ def parse_parameter_value(base64_value):
7
+ val = base64.b64decode(base64_value).decode("utf-8")
8
+
9
+ try:
10
+ return json.loads(val)
11
+ except json.decoder.JSONDecodeError:
12
+ # fallback to using the original value.
13
+ return val
14
+
15
+
16
+ if __name__ == "__main__":
17
+ base64_val = sys.argv[1]
18
+
19
+ print(parse_parameter_value(base64_val))
@@ -14,6 +14,7 @@ class Boto3ClientProvider(object):
14
14
  AWS_SANDBOX_ENABLED,
15
15
  AWS_SANDBOX_STS_ENDPOINT_URL,
16
16
  AWS_SANDBOX_API_KEY,
17
+ S3_CLIENT_RETRY_CONFIG,
17
18
  )
18
19
 
19
20
  if session_vars is None:
@@ -34,13 +35,19 @@ class Boto3ClientProvider(object):
34
35
  "Could not import module 'boto3'. Install boto3 first."
35
36
  )
36
37
 
38
+ # Convert dictionary config to Config object if needed
39
+ if "config" in client_params and not isinstance(
40
+ client_params["config"], Config
41
+ ):
42
+ client_params["config"] = Config(**client_params["config"])
43
+
37
44
  if module == "s3" and (
38
45
  "config" not in client_params or client_params["config"].retries is None
39
46
  ):
40
- # Use the adaptive retry strategy by default -- do not set anything if
41
- # the user has already set something
47
+ # do not set anything if the user has already set something
42
48
  config = client_params.get("config", Config())
43
- config.retries = {"max_attempts": 10, "mode": "adaptive"}
49
+ config.retries = S3_CLIENT_RETRY_CONFIG
50
+ client_params["config"] = config
44
51
 
45
52
  if AWS_SANDBOX_ENABLED:
46
53
  # role is ignored in the sandbox
@@ -1,10 +1,25 @@
1
1
  import re
2
- import requests
3
2
 
4
3
  from metaflow.exception import MetaflowException
5
4
  from metaflow.metaflow_config import MAX_MEMORY_PER_TASK, MAX_CPU_PER_TASK
6
5
 
7
6
 
7
+ def parse_s3_full_path(s3_uri):
8
+ from urllib.parse import urlparse
9
+
10
+ # <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
11
+ scheme, netloc, path, _, _, _ = urlparse(s3_uri)
12
+ assert scheme == "s3"
13
+ assert netloc is not None
14
+
15
+ bucket = netloc
16
+ path = path.lstrip("/").rstrip("/")
17
+ if path == "":
18
+ path = None
19
+
20
+ return bucket, path
21
+
22
+
8
23
  def get_ec2_instance_metadata():
9
24
  """
10
25
  Fetches the EC2 instance metadata through AWS instance metadata service
@@ -15,6 +30,10 @@ def get_ec2_instance_metadata():
15
30
  - ec2-region
16
31
  - ec2-availability-zone
17
32
  """
33
+
34
+ # TODO: Remove dependency on requests
35
+ import requests
36
+
18
37
  meta = {}
19
38
  # Capture AWS instance identity metadata. This is best-effort only since
20
39
  # access to this end-point might be blocked on AWS and not available
@@ -30,7 +49,7 @@ def get_ec2_instance_metadata():
30
49
  # Try to get an IMDSv2 token.
31
50
  token = requests.put(
32
51
  url="http://169.254.169.254/latest/api/token",
33
- headers={"X-aws-ec2-metadata-token-ttl-seconds": 100},
52
+ headers={"X-aws-ec2-metadata-token-ttl-seconds": "100"},
34
53
  timeout=timeout,
35
54
  ).text
36
55
  except:
@@ -144,6 +163,8 @@ def compute_resource_attributes(decos, compute_deco, step_name, resource_default
144
163
  # Here we don't have ints, so we compare the value and raise
145
164
  # an exception if not equal
146
165
  if my_val != v:
166
+ # TODO: Throw a better exception since the user has no
167
+ # knowledge of 'compute' decorator
147
168
  raise MetaflowException(
148
169
  "'resources' and compute decorator have conflicting "
149
170
  "values for '%s'. Please use consistent values or "
@@ -187,3 +208,35 @@ def sanitize_batch_tag(key, value):
187
208
  _value = re.sub(RE_NOT_PERMITTED, "", value)[:256]
188
209
 
189
210
  return _key, _value
211
+
212
+
213
+ def validate_aws_tag(key: str, value: str):
214
+ PERMITTED = r"[A-Za-z0-9\s\+\-\=\.\_\:\/\@]"
215
+
216
+ AWS_PREFIX = r"^aws\:" # case-insensitive.
217
+ if re.match(AWS_PREFIX, key, re.IGNORECASE) or re.match(
218
+ AWS_PREFIX, value, re.IGNORECASE
219
+ ):
220
+ raise MetaflowException(
221
+ "'aws:' is not an allowed prefix for either tag keys or values"
222
+ )
223
+
224
+ if len(key) > 128:
225
+ raise MetaflowException(
226
+ "Tag key *%s* is too long. Maximum allowed tag key length is 128." % key
227
+ )
228
+ if len(value) > 256:
229
+ raise MetaflowException(
230
+ "Tag value *%s* is too long. Maximum allowed tag value length is 256."
231
+ % value
232
+ )
233
+
234
+ if not re.match(PERMITTED, key):
235
+ raise MetaflowException(
236
+ "Key *s* is not permitted. Tags must match pattern: %s" % (key, PERMITTED)
237
+ )
238
+ if not re.match(PERMITTED, value):
239
+ raise MetaflowException(
240
+ "Value *%s* is not permitted. Tags must match pattern: %s"
241
+ % (value, PERMITTED)
242
+ )
@@ -53,20 +53,32 @@ class BatchKilledException(MetaflowException):
53
53
 
54
54
 
55
55
  class Batch(object):
56
- def __init__(self, metadata, environment):
56
+ def __init__(self, metadata, environment, flow_datastore=None):
57
57
  self.metadata = metadata
58
58
  self.environment = environment
59
+ self.flow_datastore = flow_datastore
59
60
  self._client = BatchClient()
60
61
  atexit.register(lambda: self.job.kill() if hasattr(self, "job") else None)
61
62
 
62
- def _command(self, environment, code_package_url, step_name, step_cmds, task_spec):
63
+ def _command(
64
+ self,
65
+ environment,
66
+ code_package_metadata,
67
+ code_package_url,
68
+ step_name,
69
+ step_cmds,
70
+ task_spec,
71
+ offload_command_to_s3,
72
+ ):
63
73
  mflog_expr = export_mflog_env_vars(
64
74
  datastore_type="s3",
65
75
  stdout_path=STDOUT_PATH,
66
76
  stderr_path=STDERR_PATH,
67
77
  **task_spec
68
78
  )
69
- init_cmds = environment.get_package_commands(code_package_url, "s3")
79
+ init_cmds = environment.get_package_commands(
80
+ code_package_url, "s3", code_package_metadata
81
+ )
70
82
  init_expr = " && ".join(init_cmds)
71
83
  step_expr = bash_capture_logs(
72
84
  " && ".join(environment.bootstrap_commands(step_name, "s3") + step_cmds)
@@ -94,7 +106,43 @@ class Batch(object):
94
106
  # We lose the last logs in this scenario (although they are visible
95
107
  # still through AWS CloudWatch console).
96
108
  cmd_str += "c=$?; %s; exit $c" % BASH_SAVE_LOGS
97
- return shlex.split('bash -c "%s"' % cmd_str)
109
+ command = shlex.split('bash -c "%s"' % cmd_str)
110
+
111
+ if not offload_command_to_s3:
112
+ return command
113
+
114
+ # If S3 upload is enabled, we need to modify the command after it's created
115
+ if self.flow_datastore is None:
116
+ raise MetaflowException(
117
+ "Can not offload Batch command to S3 without a datastore configured."
118
+ )
119
+
120
+ from metaflow.plugins.aws.aws_utils import parse_s3_full_path
121
+
122
+ # Get the command that was created
123
+ # Upload the command to S3 during deployment
124
+ try:
125
+ command_bytes = cmd_str.encode("utf-8")
126
+ result_paths = self.flow_datastore.save_data([command_bytes], len_hint=1)
127
+ s3_path, _key = result_paths[0]
128
+
129
+ bucket, s3_object = parse_s3_full_path(s3_path)
130
+ download_script = "{python} -c '{script}'".format(
131
+ python=self.environment._python(),
132
+ script='import boto3, os; ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\"); boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {})).download_file(\\"%s\\", \\"%s\\", \\"/tmp/step_command.sh\\")'
133
+ % (bucket, s3_object),
134
+ )
135
+ download_cmd = (
136
+ f"{self.environment._get_install_dependencies_cmd('s3')} && " # required for boto3 due to the original dependencies cmd getting packaged, and not being downloaded in time.
137
+ f"{download_script} && "
138
+ f"chmod +x /tmp/step_command.sh && "
139
+ f"bash /tmp/step_command.sh"
140
+ )
141
+ new_cmd = shlex.split('bash -c "%s"' % download_cmd)
142
+ return new_cmd
143
+ except Exception as e:
144
+ print(f"Warning: Failed to upload command to S3: {e}")
145
+ print("Falling back to inline command")
98
146
 
99
147
  def _search_jobs(self, flow_name, run_id, user):
100
148
  if user is None:
@@ -167,6 +215,7 @@ class Batch(object):
167
215
  step_name,
168
216
  step_cli,
169
217
  task_spec,
218
+ code_package_metadata,
170
219
  code_package_sha,
171
220
  code_package_url,
172
221
  code_package_ds,
@@ -188,6 +237,7 @@ class Batch(object):
188
237
  host_volumes=None,
189
238
  efs_volumes=None,
190
239
  use_tmpfs=None,
240
+ aws_batch_tags=None,
191
241
  tmpfs_tempdir=None,
192
242
  tmpfs_size=None,
193
243
  tmpfs_path=None,
@@ -195,6 +245,7 @@ class Batch(object):
195
245
  ephemeral_storage=None,
196
246
  log_driver=None,
197
247
  log_options=None,
248
+ offload_command_to_s3=False,
198
249
  ):
199
250
  job_name = self._job_name(
200
251
  attrs.get("metaflow.user"),
@@ -210,7 +261,13 @@ class Batch(object):
210
261
  .job_queue(queue)
211
262
  .command(
212
263
  self._command(
213
- self.environment, code_package_url, step_name, [step_cli], task_spec
264
+ self.environment,
265
+ code_package_metadata,
266
+ code_package_url,
267
+ step_name,
268
+ [step_cli],
269
+ task_spec,
270
+ offload_command_to_s3,
214
271
  )
215
272
  )
216
273
  .image(image)
@@ -249,6 +306,7 @@ class Batch(object):
249
306
  )
250
307
  .task_id(attrs.get("metaflow.task_id"))
251
308
  .environment_variable("AWS_DEFAULT_REGION", self._client.region())
309
+ .environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
252
310
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
253
311
  .environment_variable("METAFLOW_CODE_URL", code_package_url)
254
312
  .environment_variable("METAFLOW_CODE_DS", code_package_ds)
@@ -327,6 +385,11 @@ class Batch(object):
327
385
  if key in attrs:
328
386
  k, v = sanitize_batch_tag(key, attrs.get(key))
329
387
  job.tag(k, v)
388
+
389
+ if aws_batch_tags is not None:
390
+ for key, value in aws_batch_tags.items():
391
+ job.tag(key, value)
392
+
330
393
  return job
331
394
 
332
395
  def launch_job(
@@ -334,6 +397,7 @@ class Batch(object):
334
397
  step_name,
335
398
  step_cli,
336
399
  task_spec,
400
+ code_package_metadata,
337
401
  code_package_sha,
338
402
  code_package_url,
339
403
  code_package_ds,
@@ -353,6 +417,7 @@ class Batch(object):
353
417
  host_volumes=None,
354
418
  efs_volumes=None,
355
419
  use_tmpfs=None,
420
+ aws_batch_tags=None,
356
421
  tmpfs_tempdir=None,
357
422
  tmpfs_size=None,
358
423
  tmpfs_path=None,
@@ -374,6 +439,7 @@ class Batch(object):
374
439
  step_name,
375
440
  step_cli,
376
441
  task_spec,
442
+ code_package_metadata,
377
443
  code_package_sha,
378
444
  code_package_url,
379
445
  code_package_ds,
@@ -395,6 +461,7 @@ class Batch(object):
395
461
  host_volumes=host_volumes,
396
462
  efs_volumes=efs_volumes,
397
463
  use_tmpfs=use_tmpfs,
464
+ aws_batch_tags=aws_batch_tags,
398
465
  tmpfs_tempdir=tmpfs_tempdir,
399
466
  tmpfs_size=tmpfs_size,
400
467
  tmpfs_path=tmpfs_path,