ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -3,16 +3,25 @@ import sys
3
3
  import time
4
4
  import traceback
5
5
 
6
+ from metaflow.plugins.kubernetes.kube_utils import (
7
+ parse_cli_options,
8
+ parse_kube_keyvalue_list,
9
+ )
10
+ from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
11
+ import metaflow.tracing as tracing
6
12
  from metaflow import JSONTypeClass, util
7
13
  from metaflow._vendor import click
8
- from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, CommandException
9
- from metaflow.metadata.util import sync_local_metadata_from_datastore
10
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, KUBERNETES_LABELS
14
+ from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
15
+ from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
16
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
11
17
  from metaflow.mflog import TASK_LOG_SOURCE
12
- import metaflow.tracing as tracing
18
+ from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
13
19
 
14
- from .kubernetes import Kubernetes, KubernetesKilledException, parse_kube_keyvalue_list
15
- from .kubernetes_decorator import KubernetesDecorator
20
+ from .kubernetes import (
21
+ Kubernetes,
22
+ KubernetesException,
23
+ KubernetesKilledException,
24
+ )
16
25
 
17
26
 
18
27
  @click.group()
@@ -25,13 +34,14 @@ def kubernetes():
25
34
  pass
26
35
 
27
36
 
28
- @tracing.cli_entrypoint("kubernetes/step")
29
37
  @kubernetes.command(
30
38
  help="Execute a single task on Kubernetes. This command calls the top-level step "
31
39
  "command inside a Kubernetes pod with the given options. Typically you do not call "
32
40
  "this command directly; it is used internally by Metaflow."
33
41
  )
42
+ @tracing.cli("kubernetes/step")
34
43
  @click.argument("step-name")
44
+ @click.argument("code-package-metadata")
35
45
  @click.argument("code-package-sha")
36
46
  @click.argument("code-package-url")
37
47
  @click.option(
@@ -44,6 +54,12 @@ def kubernetes():
44
54
  default=None,
45
55
  help="Optional Docker Image Pull Policy for Kubernetes pod.",
46
56
  )
57
+ @click.option(
58
+ "--image-pull-secrets",
59
+ default=None,
60
+ type=JSONTypeClass(),
61
+ multiple=False,
62
+ )
47
63
  @click.option(
48
64
  "--service-account",
49
65
  help="IRSA requirement for Kubernetes pod.",
@@ -107,6 +123,23 @@ def kubernetes():
107
123
  type=JSONTypeClass(),
108
124
  multiple=False,
109
125
  )
126
+ @click.option("--shared-memory", default=None, help="Size of shared memory in MiB")
127
+ @click.option("--port", default=None, help="Port number to expose from the container")
128
+ @click.option(
129
+ "--ubf-context", default=None, type=click.Choice([None, UBF_CONTROL, UBF_TASK])
130
+ )
131
+ @click.option(
132
+ "--num-parallel",
133
+ default=None,
134
+ type=int,
135
+ help="Number of parallel nodes to run as a multi-node job.",
136
+ )
137
+ @click.option(
138
+ "--qos",
139
+ default=None,
140
+ type=str,
141
+ help="Quality of Service class for the Kubernetes pod",
142
+ )
110
143
  @click.option(
111
144
  "--labels",
112
145
  default=None,
@@ -119,24 +152,23 @@ def kubernetes():
119
152
  type=JSONTypeClass(),
120
153
  multiple=False,
121
154
  )
122
- @click.option("--ubf-context", default=None, type=click.Choice([None, "ubf_control"]))
123
155
  @click.option(
124
- "--num-parallel",
125
- default=0,
126
- type=int,
127
- help="Number of parallel nodes to run as a multi-node job.",
156
+ "--security-context",
157
+ default=None,
158
+ type=JSONTypeClass(),
159
+ multiple=False,
128
160
  )
129
- @click.option("--shared-memory", default=None, help="Size of shared memory in MiB")
130
- @click.option("--port", default=None, help="Port number to expose from the container")
131
161
  @click.pass_context
132
162
  def step(
133
163
  ctx,
134
164
  step_name,
165
+ code_package_metadata,
135
166
  code_package_sha,
136
167
  code_package_url,
137
168
  executable=None,
138
169
  image=None,
139
170
  image_pull_policy=None,
171
+ image_pull_secrets=None,
140
172
  service_account=None,
141
173
  secrets=None,
142
174
  node_selector=None,
@@ -153,11 +185,13 @@ def step(
153
185
  run_time_limit=None,
154
186
  persistent_volume_claims=None,
155
187
  tolerations=None,
156
- labels=None,
157
- annotations=None,
158
- num_parallel=None,
159
188
  shared_memory=None,
160
189
  port=None,
190
+ num_parallel=None,
191
+ qos=None,
192
+ labels=None,
193
+ annotations=None,
194
+ security_context=None,
161
195
  **kwargs
162
196
  ):
163
197
  def echo(msg, stream="stderr", job_id=None, **kwargs):
@@ -172,7 +206,7 @@ def step(
172
206
  executable = ctx.obj.environment.executable(step_name, executable)
173
207
 
174
208
  # Set environment
175
- env = {}
209
+ env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
176
210
  env_deco = [deco for deco in node.decorators if deco.name == "environment"]
177
211
  if env_deco:
178
212
  env = env_deco[0].attributes["vars"]
@@ -189,6 +223,12 @@ def step(
189
223
  kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys())
190
224
  env.update(split_vars)
191
225
 
226
+ if num_parallel is not None and num_parallel <= 1:
227
+ raise KubernetesException(
228
+ "Using @parallel with `num_parallel` <= 1 is not supported with "
229
+ "@kubernetes. Please set the value of `num_parallel` to be greater than 1."
230
+ )
231
+
192
232
  # Set retry policy.
193
233
  retry_count = int(kwargs.get("retry_count", 0))
194
234
  retry_deco = [deco for deco in node.decorators if deco.name == "retry"]
@@ -203,25 +243,37 @@ def step(
203
243
  )
204
244
  time.sleep(minutes_between_retries * 60)
205
245
 
206
- step_args = " ".join(util.dict_to_cli_options(kwargs))
207
- num_parallel = num_parallel or 0
208
- if num_parallel and num_parallel > 1:
209
- # For multinode, we need to add a placeholder that can be mutated by the caller
210
- step_args += " [multinode-args]"
246
+ # Explicitly Remove `ubf_context` from `kwargs` so that it's not passed as a commandline option
247
+ # If an underlying step command is executing a vanilla Kubernetes job, then it should never need
248
+ # to know about the UBF context.
249
+ # If it is a jobset which is executing a multi-node job, then the UBF context is set based on the
250
+ # `ubf_context` parameter passed to the jobset.
251
+ kwargs.pop("ubf_context", None)
252
+ # `task_id` is also need to be removed from `kwargs` as it needs to be dynamically
253
+ # set in the downstream code IF num_parallel is > 1
254
+ task_id = kwargs["task_id"]
255
+ if num_parallel:
256
+ kwargs.pop("task_id")
211
257
 
212
258
  step_cli = "{entrypoint} {top_args} step {step} {step_args}".format(
213
259
  entrypoint="%s -u %s" % (executable, os.path.basename(sys.argv[0])),
214
260
  top_args=" ".join(util.dict_to_cli_options(ctx.parent.parent.params)),
215
261
  step=step_name,
216
- step_args=step_args,
262
+ step_args=" ".join(util.dict_to_cli_options(kwargs)),
217
263
  )
264
+ # Since it is a parallel step there are some parts of the step_cli that need to be modified
265
+ # based on the type of worker in the JobSet. This is why we will create a placeholder string
266
+ # in the template which will be replaced based on the type of worker.
267
+
268
+ if num_parallel:
269
+ step_cli = "%s {METAFLOW_PARALLEL_STEP_CLI_OPTIONS_TEMPLATE}" % step_cli
218
270
 
219
271
  # Set log tailing.
220
272
  ds = ctx.obj.flow_datastore.get_task_datastore(
221
273
  mode="w",
222
274
  run_id=kwargs["run_id"],
223
275
  step_name=step_name,
224
- task_id=kwargs["task_id"],
276
+ task_id=task_id,
225
277
  attempt=int(retry_count),
226
278
  )
227
279
  stdout_location = ds.get_log_location(TASK_LOG_SOURCE, "stdout")
@@ -235,14 +287,10 @@ def step(
235
287
  sync_local_metadata_from_datastore(
236
288
  DATASTORE_LOCAL_DIR,
237
289
  ctx.obj.flow_datastore.get_task_datastore(
238
- kwargs["run_id"], step_name, kwargs["task_id"]
290
+ kwargs["run_id"], step_name, task_id
239
291
  ),
240
292
  )
241
293
 
242
- attrs = {
243
- "metaflow.task_id": kwargs["task_id"],
244
- "requires_passwordless_ssh": any([getattr(deco, "requires_passwordless_ssh", False) for deco in node.decorators]),
245
- }
246
294
  try:
247
295
  kubernetes = Kubernetes(
248
296
  datastore=ctx.obj.flow_datastore,
@@ -255,15 +303,17 @@ def step(
255
303
  flow_name=ctx.obj.flow.name,
256
304
  run_id=kwargs["run_id"],
257
305
  step_name=step_name,
258
- task_id=kwargs["task_id"],
306
+ task_id=task_id,
259
307
  attempt=str(retry_count),
260
308
  user=util.get_username(),
309
+ code_package_metadata=code_package_metadata,
261
310
  code_package_sha=code_package_sha,
262
311
  code_package_url=code_package_url,
263
312
  code_package_ds=ctx.obj.flow_datastore.TYPE,
264
313
  step_cli=step_cli,
265
314
  docker_image=image,
266
315
  docker_image_pull_policy=image_pull_policy,
316
+ image_pull_secrets=image_pull_secrets,
267
317
  service_account=service_account,
268
318
  secrets=secrets,
269
319
  node_selector=node_selector,
@@ -281,14 +331,15 @@ def step(
281
331
  env=env,
282
332
  persistent_volume_claims=persistent_volume_claims,
283
333
  tolerations=tolerations,
284
- labels=labels,
285
- annotations=annotations,
286
- num_parallel=num_parallel,
287
334
  shared_memory=shared_memory,
288
335
  port=port,
289
- attrs=attrs,
336
+ num_parallel=num_parallel,
337
+ qos=qos,
338
+ labels=labels,
339
+ annotations=annotations,
340
+ security_context=security_context,
290
341
  )
291
- except Exception as e:
342
+ except Exception:
292
343
  traceback.print_exc(chain=False)
293
344
  _sync_metadata()
294
345
  sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
@@ -300,3 +351,84 @@ def step(
300
351
  sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
301
352
  finally:
302
353
  _sync_metadata()
354
+
355
+
356
+ @kubernetes.command(help="List unfinished Kubernetes tasks of this flow.")
357
+ @click.option(
358
+ "--my-runs",
359
+ default=False,
360
+ is_flag=True,
361
+ help="List all my unfinished tasks.",
362
+ )
363
+ @click.option("--user", default=None, help="List unfinished tasks for the given user.")
364
+ @click.option(
365
+ "--run-id",
366
+ default=None,
367
+ help="List unfinished tasks corresponding to the run id.",
368
+ )
369
+ @click.pass_obj
370
+ def list(obj, run_id, user, my_runs):
371
+ flow_name, run_id, user = parse_cli_options(
372
+ obj.flow.name, run_id, user, my_runs, obj.echo
373
+ )
374
+ kube_client = KubernetesClient()
375
+ pods = kube_client.list(obj.flow.name, run_id, user)
376
+
377
+ def format_timestamp(timestamp=None):
378
+ if timestamp is None:
379
+ return "-"
380
+ return timestamp.strftime("%Y-%m-%d %H:%M:%S")
381
+
382
+ for pod in pods:
383
+ obj.echo(
384
+ "Run: *{run_id}* "
385
+ "Pod: *{pod_id}* "
386
+ "Started At: {startedAt} "
387
+ "Status: *{status}*".format(
388
+ run_id=pod.metadata.annotations.get(
389
+ "metaflow/run_id",
390
+ pod.metadata.labels.get("workflows.argoproj.io/workflow"),
391
+ ),
392
+ pod_id=pod.metadata.name,
393
+ startedAt=format_timestamp(pod.status.start_time),
394
+ status=pod.status.phase,
395
+ )
396
+ )
397
+
398
+ if not pods:
399
+ obj.echo("No active Kubernetes pods found.")
400
+
401
+
402
+ @kubernetes.command(
403
+ help="Terminate unfinished Kubernetes tasks of this flow. Killed pods may result in newer attempts when using @retry."
404
+ )
405
+ @click.option(
406
+ "--my-runs",
407
+ default=False,
408
+ is_flag=True,
409
+ help="Kill all my unfinished tasks.",
410
+ )
411
+ @click.option(
412
+ "--user",
413
+ default=None,
414
+ help="Terminate unfinished tasks for the given user.",
415
+ )
416
+ @click.option(
417
+ "--run-id",
418
+ default=None,
419
+ help="Terminate unfinished tasks corresponding to the run id.",
420
+ )
421
+ @click.pass_obj
422
+ def kill(obj, run_id, user, my_runs):
423
+ flow_name, run_id, user = parse_cli_options(
424
+ obj.flow.name, run_id, user, my_runs, obj.echo
425
+ )
426
+
427
+ if run_id is not None and run_id.startswith("argo-") or user == "argo-workflows":
428
+ raise MetaflowException(
429
+ "Killing pods launched by Argo Workflows is not supported. "
430
+ "Use *argo-workflows terminate* instead."
431
+ )
432
+
433
+ kube_client = KubernetesClient()
434
+ kube_client.kill_pods(flow_name, run_id, user, obj.echo)
@@ -1,11 +1,12 @@
1
+ from concurrent.futures import ThreadPoolExecutor
1
2
  import os
2
3
  import sys
3
4
  import time
4
5
 
5
6
  from metaflow.exception import MetaflowException
7
+ from metaflow.metaflow_config import KUBERNETES_NAMESPACE
6
8
 
7
- from .kubernetes_job import KubernetesJob
8
-
9
+ from .kubernetes_job import KubernetesJob, KubernetesJobSet
9
10
 
10
11
  CLIENT_REFRESH_INTERVAL_SECONDS = 300
11
12
 
@@ -29,6 +30,7 @@ class KubernetesClient(object):
29
30
  % sys.executable
30
31
  )
31
32
  self._refresh_client()
33
+ self._namespace = KUBERNETES_NAMESPACE
32
34
 
33
35
  def _refresh_client(self):
34
36
  from kubernetes import client, config
@@ -61,5 +63,105 @@ class KubernetesClient(object):
61
63
 
62
64
  return self._client
63
65
 
66
+ def _find_active_pods(self, flow_name, run_id=None, user=None):
67
+ def _request(_continue=None):
68
+ # handle paginated responses
69
+ return self._client.CoreV1Api().list_namespaced_pod(
70
+ namespace=self._namespace,
71
+ # limited selector support for K8S api. We want to cover multiple statuses: Running / Pending / Unknown
72
+ field_selector="status.phase!=Succeeded,status.phase!=Failed",
73
+ limit=1000,
74
+ _continue=_continue,
75
+ )
76
+
77
+ results = _request()
78
+
79
+ if run_id is not None:
80
+ # handle argo prefixes in run_id
81
+ run_id = run_id[run_id.startswith("argo-") and len("argo-") :]
82
+
83
+ while results.metadata._continue or results.items:
84
+ for pod in results.items:
85
+ match = (
86
+ # arbitrary pods might have no annotations at all.
87
+ pod.metadata.annotations
88
+ and pod.metadata.labels
89
+ and (
90
+ run_id is None
91
+ or (pod.metadata.annotations.get("metaflow/run_id") == run_id)
92
+ # we want to also match pods launched by argo-workflows
93
+ or (
94
+ pod.metadata.labels.get("workflows.argoproj.io/workflow")
95
+ == run_id
96
+ )
97
+ )
98
+ and (
99
+ user is None
100
+ or pod.metadata.annotations.get("metaflow/user") == user
101
+ )
102
+ and (
103
+ pod.metadata.annotations.get("metaflow/flow_name") == flow_name
104
+ )
105
+ )
106
+ if match:
107
+ yield pod
108
+ if not results.metadata._continue:
109
+ break
110
+ results = _request(results.metadata._continue)
111
+
112
+ def list(self, flow_name, run_id, user):
113
+ results = self._find_active_pods(flow_name, run_id, user)
114
+
115
+ return list(results)
116
+
117
+ def kill_pods(self, flow_name, run_id, user, echo):
118
+ from kubernetes.stream import stream
119
+
120
+ api_instance = self._client.CoreV1Api()
121
+ job_api = self._client.BatchV1Api()
122
+ pods = self._find_active_pods(flow_name, run_id, user)
123
+
124
+ def _kill_pod(pod):
125
+ echo("Killing Kubernetes pod %s\n" % pod.metadata.name)
126
+ try:
127
+ stream(
128
+ api_instance.connect_get_namespaced_pod_exec,
129
+ name=pod.metadata.name,
130
+ namespace=pod.metadata.namespace,
131
+ command=[
132
+ "/bin/sh",
133
+ "-c",
134
+ "/sbin/killall5",
135
+ ],
136
+ stderr=True,
137
+ stdin=False,
138
+ stdout=True,
139
+ tty=False,
140
+ )
141
+ except Exception:
142
+ # best effort kill for pod can fail.
143
+ try:
144
+ job_name = pod.metadata.labels.get("job-name", None)
145
+ if job_name is None:
146
+ raise Exception("Could not determine job name")
147
+
148
+ job_api.patch_namespaced_job(
149
+ name=job_name,
150
+ namespace=pod.metadata.namespace,
151
+ field_manager="metaflow",
152
+ body={"spec": {"parallelism": 0}},
153
+ )
154
+ except Exception as e:
155
+ echo("failed to kill pod %s - %s" % (pod.metadata.name, str(e)))
156
+
157
+ with ThreadPoolExecutor() as executor:
158
+ operated_pods = list(executor.map(_kill_pod, pods))
159
+
160
+ if not operated_pods:
161
+ echo("No active Kubernetes pods found for run *%s*" % run_id)
162
+
163
+ def jobset(self, **kwargs):
164
+ return KubernetesJobSet(self, **kwargs)
165
+
64
166
  def job(self, **kwargs):
65
167
  return KubernetesJob(self, **kwargs)