ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. metaflow/R.py +10 -7
  2. metaflow/__init__.py +40 -25
  3. metaflow/_vendor/imghdr/__init__.py +186 -0
  4. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  5. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  6. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  7. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  8. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  9. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  10. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  11. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  12. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  13. metaflow/_vendor/typeguard/__init__.py +48 -0
  14. metaflow/_vendor/typeguard/_checkers.py +1070 -0
  15. metaflow/_vendor/typeguard/_config.py +108 -0
  16. metaflow/_vendor/typeguard/_decorators.py +233 -0
  17. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  18. metaflow/_vendor/typeguard/_functions.py +308 -0
  19. metaflow/_vendor/typeguard/_importhook.py +213 -0
  20. metaflow/_vendor/typeguard/_memo.py +48 -0
  21. metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
  22. metaflow/_vendor/typeguard/_suppression.py +86 -0
  23. metaflow/_vendor/typeguard/_transformer.py +1229 -0
  24. metaflow/_vendor/typeguard/_union_transformer.py +55 -0
  25. metaflow/_vendor/typeguard/_utils.py +173 -0
  26. metaflow/_vendor/typeguard/py.typed +0 -0
  27. metaflow/_vendor/typing_extensions.py +3641 -0
  28. metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
  29. metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
  30. metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
  31. metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
  32. metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
  33. metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
  34. metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
  35. metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
  36. metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
  37. metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
  38. metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
  39. metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
  40. metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
  41. metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
  42. metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
  43. metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
  44. metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
  45. metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
  46. metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
  47. metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
  48. metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
  49. metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
  50. metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
  51. metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
  52. metaflow/_vendor/yaml/__init__.py +427 -0
  53. metaflow/_vendor/yaml/composer.py +139 -0
  54. metaflow/_vendor/yaml/constructor.py +748 -0
  55. metaflow/_vendor/yaml/cyaml.py +101 -0
  56. metaflow/_vendor/yaml/dumper.py +62 -0
  57. metaflow/_vendor/yaml/emitter.py +1137 -0
  58. metaflow/_vendor/yaml/error.py +75 -0
  59. metaflow/_vendor/yaml/events.py +86 -0
  60. metaflow/_vendor/yaml/loader.py +63 -0
  61. metaflow/_vendor/yaml/nodes.py +49 -0
  62. metaflow/_vendor/yaml/parser.py +589 -0
  63. metaflow/_vendor/yaml/reader.py +185 -0
  64. metaflow/_vendor/yaml/representer.py +389 -0
  65. metaflow/_vendor/yaml/resolver.py +227 -0
  66. metaflow/_vendor/yaml/scanner.py +1435 -0
  67. metaflow/_vendor/yaml/serializer.py +111 -0
  68. metaflow/_vendor/yaml/tokens.py +104 -0
  69. metaflow/cards.py +5 -0
  70. metaflow/cli.py +331 -785
  71. metaflow/cli_args.py +17 -0
  72. metaflow/cli_components/__init__.py +0 -0
  73. metaflow/cli_components/dump_cmd.py +96 -0
  74. metaflow/cli_components/init_cmd.py +52 -0
  75. metaflow/cli_components/run_cmds.py +546 -0
  76. metaflow/cli_components/step_cmd.py +334 -0
  77. metaflow/cli_components/utils.py +140 -0
  78. metaflow/client/__init__.py +1 -0
  79. metaflow/client/core.py +467 -73
  80. metaflow/client/filecache.py +75 -35
  81. metaflow/clone_util.py +7 -1
  82. metaflow/cmd/code/__init__.py +231 -0
  83. metaflow/cmd/develop/stub_generator.py +756 -288
  84. metaflow/cmd/develop/stubs.py +12 -28
  85. metaflow/cmd/main_cli.py +6 -4
  86. metaflow/cmd/make_wrapper.py +78 -0
  87. metaflow/datastore/__init__.py +1 -0
  88. metaflow/datastore/content_addressed_store.py +41 -10
  89. metaflow/datastore/datastore_set.py +11 -2
  90. metaflow/datastore/flow_datastore.py +156 -10
  91. metaflow/datastore/spin_datastore.py +91 -0
  92. metaflow/datastore/task_datastore.py +154 -39
  93. metaflow/debug.py +5 -0
  94. metaflow/decorators.py +404 -78
  95. metaflow/exception.py +8 -2
  96. metaflow/extension_support/__init__.py +527 -376
  97. metaflow/extension_support/_empty_file.py +2 -2
  98. metaflow/extension_support/plugins.py +49 -31
  99. metaflow/flowspec.py +482 -33
  100. metaflow/graph.py +210 -42
  101. metaflow/includefile.py +84 -40
  102. metaflow/lint.py +141 -22
  103. metaflow/meta_files.py +13 -0
  104. metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
  105. metaflow/{metadata → metadata_provider}/metadata.py +86 -1
  106. metaflow/metaflow_config.py +175 -28
  107. metaflow/metaflow_config_funcs.py +51 -3
  108. metaflow/metaflow_current.py +4 -10
  109. metaflow/metaflow_environment.py +139 -53
  110. metaflow/metaflow_git.py +115 -0
  111. metaflow/metaflow_profile.py +18 -0
  112. metaflow/metaflow_version.py +150 -66
  113. metaflow/mflog/__init__.py +4 -3
  114. metaflow/mflog/save_logs.py +2 -2
  115. metaflow/multicore_utils.py +31 -14
  116. metaflow/package/__init__.py +673 -0
  117. metaflow/packaging_sys/__init__.py +880 -0
  118. metaflow/packaging_sys/backend.py +128 -0
  119. metaflow/packaging_sys/distribution_support.py +153 -0
  120. metaflow/packaging_sys/tar_backend.py +99 -0
  121. metaflow/packaging_sys/utils.py +54 -0
  122. metaflow/packaging_sys/v1.py +527 -0
  123. metaflow/parameters.py +149 -28
  124. metaflow/plugins/__init__.py +74 -5
  125. metaflow/plugins/airflow/airflow.py +40 -25
  126. metaflow/plugins/airflow/airflow_cli.py +22 -5
  127. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  128. metaflow/plugins/airflow/airflow_utils.py +5 -3
  129. metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
  130. metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
  131. metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
  132. metaflow/plugins/argo/argo_client.py +78 -33
  133. metaflow/plugins/argo/argo_events.py +6 -6
  134. metaflow/plugins/argo/argo_workflows.py +2410 -527
  135. metaflow/plugins/argo/argo_workflows_cli.py +571 -121
  136. metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
  137. metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
  138. metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
  139. metaflow/plugins/argo/capture_error.py +73 -0
  140. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  141. metaflow/plugins/argo/exit_hooks.py +209 -0
  142. metaflow/plugins/argo/jobset_input_paths.py +15 -0
  143. metaflow/plugins/argo/param_val.py +19 -0
  144. metaflow/plugins/aws/aws_client.py +10 -3
  145. metaflow/plugins/aws/aws_utils.py +55 -2
  146. metaflow/plugins/aws/batch/batch.py +72 -5
  147. metaflow/plugins/aws/batch/batch_cli.py +33 -10
  148. metaflow/plugins/aws/batch/batch_client.py +4 -3
  149. metaflow/plugins/aws/batch/batch_decorator.py +102 -35
  150. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  151. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  152. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  153. metaflow/plugins/aws/step_functions/step_functions.py +65 -8
  154. metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
  155. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
  156. metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
  157. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
  158. metaflow/plugins/azure/azure_exceptions.py +1 -1
  159. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  160. metaflow/plugins/azure/azure_tail.py +1 -1
  161. metaflow/plugins/azure/includefile_support.py +2 -0
  162. metaflow/plugins/cards/card_cli.py +66 -30
  163. metaflow/plugins/cards/card_creator.py +25 -1
  164. metaflow/plugins/cards/card_datastore.py +21 -49
  165. metaflow/plugins/cards/card_decorator.py +132 -8
  166. metaflow/plugins/cards/card_modules/basic.py +112 -17
  167. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  168. metaflow/plugins/cards/card_modules/card.py +16 -1
  169. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  170. metaflow/plugins/cards/card_modules/components.py +665 -28
  171. metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
  172. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  173. metaflow/plugins/cards/card_modules/main.css +1 -0
  174. metaflow/plugins/cards/card_modules/main.js +68 -49
  175. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  176. metaflow/plugins/cards/card_modules/test_cards.py +26 -12
  177. metaflow/plugins/cards/card_server.py +39 -14
  178. metaflow/plugins/cards/component_serializer.py +2 -9
  179. metaflow/plugins/cards/metadata.py +22 -0
  180. metaflow/plugins/catch_decorator.py +9 -0
  181. metaflow/plugins/datastores/azure_storage.py +10 -1
  182. metaflow/plugins/datastores/gs_storage.py +6 -2
  183. metaflow/plugins/datastores/local_storage.py +12 -6
  184. metaflow/plugins/datastores/spin_storage.py +12 -0
  185. metaflow/plugins/datatools/local.py +2 -0
  186. metaflow/plugins/datatools/s3/s3.py +126 -75
  187. metaflow/plugins/datatools/s3/s3op.py +254 -121
  188. metaflow/plugins/env_escape/__init__.py +3 -3
  189. metaflow/plugins/env_escape/client_modules.py +102 -72
  190. metaflow/plugins/env_escape/server.py +7 -0
  191. metaflow/plugins/env_escape/stub.py +24 -5
  192. metaflow/plugins/events_decorator.py +343 -185
  193. metaflow/plugins/exit_hook/__init__.py +0 -0
  194. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  195. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  196. metaflow/plugins/gcp/__init__.py +1 -1
  197. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  198. metaflow/plugins/gcp/gs_tail.py +10 -6
  199. metaflow/plugins/gcp/includefile_support.py +3 -0
  200. metaflow/plugins/kubernetes/kube_utils.py +108 -0
  201. metaflow/plugins/kubernetes/kubernetes.py +411 -130
  202. metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
  203. metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
  204. metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
  205. metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
  206. metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
  207. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  208. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  209. metaflow/plugins/logs_cli.py +359 -0
  210. metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
  211. metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
  212. metaflow/plugins/metadata_providers/spin.py +16 -0
  213. metaflow/plugins/package_cli.py +36 -24
  214. metaflow/plugins/parallel_decorator.py +128 -11
  215. metaflow/plugins/parsers.py +16 -0
  216. metaflow/plugins/project_decorator.py +51 -5
  217. metaflow/plugins/pypi/bootstrap.py +357 -105
  218. metaflow/plugins/pypi/conda_decorator.py +82 -81
  219. metaflow/plugins/pypi/conda_environment.py +187 -52
  220. metaflow/plugins/pypi/micromamba.py +157 -47
  221. metaflow/plugins/pypi/parsers.py +268 -0
  222. metaflow/plugins/pypi/pip.py +88 -13
  223. metaflow/plugins/pypi/pypi_decorator.py +37 -1
  224. metaflow/plugins/pypi/utils.py +48 -2
  225. metaflow/plugins/resources_decorator.py +2 -2
  226. metaflow/plugins/secrets/__init__.py +3 -0
  227. metaflow/plugins/secrets/secrets_decorator.py +26 -181
  228. metaflow/plugins/secrets/secrets_func.py +49 -0
  229. metaflow/plugins/secrets/secrets_spec.py +101 -0
  230. metaflow/plugins/secrets/utils.py +74 -0
  231. metaflow/plugins/tag_cli.py +4 -7
  232. metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
  233. metaflow/plugins/timeout_decorator.py +3 -3
  234. metaflow/plugins/uv/__init__.py +0 -0
  235. metaflow/plugins/uv/bootstrap.py +128 -0
  236. metaflow/plugins/uv/uv_environment.py +72 -0
  237. metaflow/procpoll.py +1 -1
  238. metaflow/pylint_wrapper.py +5 -1
  239. metaflow/runner/__init__.py +0 -0
  240. metaflow/runner/click_api.py +717 -0
  241. metaflow/runner/deployer.py +470 -0
  242. metaflow/runner/deployer_impl.py +201 -0
  243. metaflow/runner/metaflow_runner.py +714 -0
  244. metaflow/runner/nbdeploy.py +132 -0
  245. metaflow/runner/nbrun.py +225 -0
  246. metaflow/runner/subprocess_manager.py +650 -0
  247. metaflow/runner/utils.py +335 -0
  248. metaflow/runtime.py +1078 -260
  249. metaflow/sidecar/sidecar_worker.py +1 -1
  250. metaflow/system/__init__.py +5 -0
  251. metaflow/system/system_logger.py +85 -0
  252. metaflow/system/system_monitor.py +108 -0
  253. metaflow/system/system_utils.py +19 -0
  254. metaflow/task.py +521 -225
  255. metaflow/tracing/__init__.py +7 -7
  256. metaflow/tracing/span_exporter.py +31 -38
  257. metaflow/tracing/tracing_modules.py +38 -43
  258. metaflow/tuple_util.py +27 -0
  259. metaflow/user_configs/__init__.py +0 -0
  260. metaflow/user_configs/config_options.py +563 -0
  261. metaflow/user_configs/config_parameters.py +598 -0
  262. metaflow/user_decorators/__init__.py +0 -0
  263. metaflow/user_decorators/common.py +144 -0
  264. metaflow/user_decorators/mutable_flow.py +512 -0
  265. metaflow/user_decorators/mutable_step.py +424 -0
  266. metaflow/user_decorators/user_flow_decorator.py +264 -0
  267. metaflow/user_decorators/user_step_decorator.py +749 -0
  268. metaflow/util.py +243 -27
  269. metaflow/vendor.py +23 -7
  270. metaflow/version.py +1 -1
  271. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
  272. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
  273. ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
  274. ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
  275. ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
  276. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  277. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
  278. metaflow/_vendor/v3_5/__init__.py +0 -1
  279. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  280. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  281. metaflow/package.py +0 -188
  282. ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
  283. ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
  284. /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
  285. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  286. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  287. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  288. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
  289. {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,8 @@
1
1
  import json
2
2
  import math
3
3
  import os
4
- import re
5
4
  import shlex
6
5
  import time
7
- import copy
8
- from typing import Dict, List, Optional
9
- import uuid
10
6
  from uuid import uuid4
11
7
 
12
8
  from metaflow import current, util
@@ -15,10 +11,13 @@ from metaflow.metaflow_config import (
15
11
  ARGO_EVENTS_EVENT,
16
12
  ARGO_EVENTS_EVENT_BUS,
17
13
  ARGO_EVENTS_EVENT_SOURCE,
18
- ARGO_EVENTS_SERVICE_ACCOUNT,
19
14
  ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
20
- AWS_SECRETS_MANAGER_DEFAULT_REGION,
15
+ ARGO_EVENTS_SERVICE_ACCOUNT,
21
16
  ARGO_EVENTS_WEBHOOK_AUTH,
17
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS,
18
+ ARGO_WORKFLOWS_ENV_VARS_TO_SKIP,
19
+ AWS_SECRETS_MANAGER_DEFAULT_REGION,
20
+ AZURE_KEY_VAULT_PREFIX,
22
21
  AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
23
22
  CARD_AZUREROOT,
24
23
  CARD_GSROOT,
@@ -33,16 +32,16 @@ from metaflow.metaflow_config import (
33
32
  DEFAULT_SECRETS_BACKEND_TYPE,
34
33
  GCP_SECRET_MANAGER_PREFIX,
35
34
  KUBERNETES_FETCH_EC2_METADATA,
36
- KUBERNETES_LABELS,
37
35
  KUBERNETES_SANDBOX_INIT_SCRIPT,
36
+ OTEL_ENDPOINT,
38
37
  S3_ENDPOINT_URL,
38
+ S3_SERVER_SIDE_ENCRYPTION,
39
39
  SERVICE_HEADERS,
40
+ KUBERNETES_SECRETS,
40
41
  SERVICE_INTERNAL_URL,
41
- S3_SERVER_SIDE_ENCRYPTION,
42
- OTEL_ENDPOINT,
43
42
  )
44
- from metaflow.metaflow_config_funcs import config_values
45
-
43
+ from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
44
+ from metaflow.metaflow_config_funcs import config_values, init_config
46
45
  from metaflow.mflog import (
47
46
  BASH_SAVE_LOGS,
48
47
  bash_capture_logs,
@@ -60,6 +59,10 @@ STDERR_FILE = "mflog_stderr"
60
59
  STDOUT_PATH = os.path.join(LOGS_DIR, STDOUT_FILE)
61
60
  STDERR_PATH = os.path.join(LOGS_DIR, STDERR_FILE)
62
61
 
62
+ METAFLOW_PARALLEL_STEP_CLI_OPTIONS_TEMPLATE = (
63
+ "{METAFLOW_PARALLEL_STEP_CLI_OPTIONS_TEMPLATE}"
64
+ )
65
+
63
66
 
64
67
  class KubernetesException(MetaflowException):
65
68
  headline = "Kubernetes error"
@@ -87,6 +90,7 @@ class Kubernetes(object):
87
90
  step_name,
88
91
  task_id,
89
92
  attempt,
93
+ code_package_metadata,
90
94
  code_package_url,
91
95
  step_cmds,
92
96
  ):
@@ -101,7 +105,7 @@ class Kubernetes(object):
101
105
  stderr_path=STDERR_PATH,
102
106
  )
103
107
  init_cmds = self._environment.get_package_commands(
104
- code_package_url, self._datastore.TYPE
108
+ code_package_url, self._datastore.TYPE, code_package_metadata
105
109
  )
106
110
  init_expr = " && ".join(init_cmds)
107
111
  step_expr = bash_capture_logs(
@@ -143,9 +147,335 @@ class Kubernetes(object):
143
147
  return shlex.split('bash -c "%s"' % cmd_str)
144
148
 
145
149
  def launch_job(self, **kwargs):
146
- self._job = self.create_job(**kwargs).execute()
150
+ if (
151
+ "num_parallel" in kwargs
152
+ and kwargs["num_parallel"]
153
+ and int(kwargs["num_parallel"]) > 0
154
+ ):
155
+ self._job = self.create_jobset(**kwargs).execute()
156
+ else:
157
+ kwargs.pop("num_parallel", None)
158
+ kwargs["name_pattern"] = "t-{uid}-".format(uid=str(uuid4())[:8])
159
+ self._job = self.create_job_object(**kwargs).create().execute()
160
+
161
+ def create_jobset(
162
+ self,
163
+ flow_name,
164
+ run_id,
165
+ step_name,
166
+ task_id,
167
+ attempt,
168
+ user,
169
+ code_package_metadata,
170
+ code_package_sha,
171
+ code_package_url,
172
+ code_package_ds,
173
+ docker_image,
174
+ docker_image_pull_policy,
175
+ image_pull_secrets=None,
176
+ step_cli=None,
177
+ service_account=None,
178
+ secrets=None,
179
+ node_selector=None,
180
+ namespace=None,
181
+ cpu=None,
182
+ gpu=None,
183
+ gpu_vendor=None,
184
+ disk=None,
185
+ memory=None,
186
+ use_tmpfs=None,
187
+ tmpfs_tempdir=None,
188
+ tmpfs_size=None,
189
+ tmpfs_path=None,
190
+ run_time_limit=None,
191
+ env=None,
192
+ persistent_volume_claims=None,
193
+ tolerations=None,
194
+ labels=None,
195
+ annotations=None,
196
+ shared_memory=None,
197
+ port=None,
198
+ num_parallel=None,
199
+ qos=None,
200
+ security_context=None,
201
+ ):
202
+ name = "js-%s" % str(uuid4())[:6]
203
+ jobset = (
204
+ KubernetesClient()
205
+ .jobset(
206
+ name=name,
207
+ namespace=namespace,
208
+ service_account=service_account,
209
+ node_selector=node_selector,
210
+ image=docker_image,
211
+ image_pull_policy=docker_image_pull_policy,
212
+ image_pull_secrets=image_pull_secrets,
213
+ cpu=cpu,
214
+ memory=memory,
215
+ disk=disk,
216
+ gpu=gpu,
217
+ gpu_vendor=gpu_vendor,
218
+ timeout_in_seconds=run_time_limit,
219
+ # Retries are handled by Metaflow runtime
220
+ retries=0,
221
+ step_name=step_name,
222
+ # We set the jobset name as the subdomain.
223
+ # todo: [final-refactor] ask @shri what was the motive when we did initial implementation
224
+ subdomain=name,
225
+ tolerations=tolerations,
226
+ use_tmpfs=use_tmpfs,
227
+ tmpfs_tempdir=tmpfs_tempdir,
228
+ tmpfs_size=tmpfs_size,
229
+ tmpfs_path=tmpfs_path,
230
+ persistent_volume_claims=persistent_volume_claims,
231
+ shared_memory=shared_memory,
232
+ port=port,
233
+ num_parallel=num_parallel,
234
+ qos=qos,
235
+ security_context=security_context,
236
+ )
237
+ .environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
238
+ .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
239
+ .environment_variable("METAFLOW_CODE_URL", code_package_url)
240
+ .environment_variable("METAFLOW_CODE_DS", code_package_ds)
241
+ .environment_variable("METAFLOW_USER", user)
242
+ .environment_variable("METAFLOW_SERVICE_URL", SERVICE_INTERNAL_URL)
243
+ .environment_variable(
244
+ "METAFLOW_SERVICE_HEADERS",
245
+ json.dumps(SERVICE_HEADERS),
246
+ )
247
+ .environment_variable("METAFLOW_DATASTORE_SYSROOT_S3", DATASTORE_SYSROOT_S3)
248
+ .environment_variable("METAFLOW_DATATOOLS_S3ROOT", DATATOOLS_S3ROOT)
249
+ .environment_variable("METAFLOW_DEFAULT_DATASTORE", self._datastore.TYPE)
250
+ .environment_variable("METAFLOW_DEFAULT_METADATA", DEFAULT_METADATA)
251
+ .environment_variable("METAFLOW_KUBERNETES_WORKLOAD", 1)
252
+ .environment_variable(
253
+ "METAFLOW_KUBERNETES_FETCH_EC2_METADATA", KUBERNETES_FETCH_EC2_METADATA
254
+ )
255
+ .environment_variable("METAFLOW_RUNTIME_ENVIRONMENT", "kubernetes")
256
+ .environment_variable(
257
+ "METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE", DEFAULT_SECRETS_BACKEND_TYPE
258
+ )
259
+ .environment_variable("METAFLOW_CARD_S3ROOT", CARD_S3ROOT)
260
+ .environment_variable(
261
+ "METAFLOW_DEFAULT_AWS_CLIENT_PROVIDER", DEFAULT_AWS_CLIENT_PROVIDER
262
+ )
263
+ .environment_variable(
264
+ "METAFLOW_DEFAULT_GCP_CLIENT_PROVIDER", DEFAULT_GCP_CLIENT_PROVIDER
265
+ )
266
+ .environment_variable(
267
+ "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION",
268
+ AWS_SECRETS_MANAGER_DEFAULT_REGION,
269
+ )
270
+ .environment_variable(
271
+ "METAFLOW_GCP_SECRET_MANAGER_PREFIX", GCP_SECRET_MANAGER_PREFIX
272
+ )
273
+ .environment_variable(
274
+ "METAFLOW_AZURE_KEY_VAULT_PREFIX", AZURE_KEY_VAULT_PREFIX
275
+ )
276
+ .environment_variable("METAFLOW_S3_ENDPOINT_URL", S3_ENDPOINT_URL)
277
+ .environment_variable(
278
+ "METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT",
279
+ AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
280
+ )
281
+ .environment_variable(
282
+ "METAFLOW_DATASTORE_SYSROOT_AZURE", DATASTORE_SYSROOT_AZURE
283
+ )
284
+ .environment_variable("METAFLOW_CARD_AZUREROOT", CARD_AZUREROOT)
285
+ .environment_variable("METAFLOW_DATASTORE_SYSROOT_GS", DATASTORE_SYSROOT_GS)
286
+ .environment_variable("METAFLOW_CARD_GSROOT", CARD_GSROOT)
287
+ # support Metaflow sandboxes
288
+ .environment_variable(
289
+ "METAFLOW_INIT_SCRIPT", KUBERNETES_SANDBOX_INIT_SCRIPT
290
+ )
291
+ .environment_variable(
292
+ "METAFLOW_KUBERNETES_SANDBOX_INIT_SCRIPT",
293
+ KUBERNETES_SANDBOX_INIT_SCRIPT,
294
+ )
295
+ .environment_variable(
296
+ "METAFLOW_ARGO_WORKFLOWS_KUBERNETES_SECRETS",
297
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS,
298
+ )
299
+ .environment_variable(
300
+ "METAFLOW_ARGO_WORKFLOWS_ENV_VARS_TO_SKIP",
301
+ ARGO_WORKFLOWS_ENV_VARS_TO_SKIP,
302
+ )
303
+ .environment_variable("METAFLOW_OTEL_ENDPOINT", OTEL_ENDPOINT)
304
+ # Skip setting METAFLOW_DATASTORE_SYSROOT_LOCAL because metadata sync
305
+ # between the local user instance and the remote Kubernetes pod
306
+ # assumes metadata is stored in DATASTORE_LOCAL_DIR on the Kubernetes
307
+ # pod; this happens when METAFLOW_DATASTORE_SYSROOT_LOCAL is NOT set (
308
+ # see get_datastore_root_from_config in datastore/local.py).
309
+ )
310
+
311
+ for k in list(
312
+ [] if not secrets else [secrets] if isinstance(secrets, str) else secrets
313
+ ) + KUBERNETES_SECRETS.split(","):
314
+ jobset.secret(k)
315
+
316
+ initial_configs = init_config()
317
+ for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
318
+ if entry not in initial_configs:
319
+ raise KubernetesException(
320
+ f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Ourebounds UI or reach out to your Outerbounds support team."
321
+ )
322
+
323
+ additional_obp_configs = {
324
+ "OBP_PERIMETER": initial_configs["OBP_PERIMETER"],
325
+ "OBP_INTEGRATIONS_URL": initial_configs[
326
+ "OBP_INTEGRATIONS_URL"
327
+ ],
328
+ }
329
+ for k, v in additional_obp_configs.items():
330
+ jobset.environment_variable(k, v)
331
+
332
+ jobset.environment_variables_from_selectors(
333
+ {
334
+ "METAFLOW_KUBERNETES_NAMESPACE": "metadata.namespace",
335
+ "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
336
+ "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
337
+ "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
338
+ "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
339
+ "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
340
+ }
341
+ )
342
+
343
+ # Temporary passing of *some* environment variables. Do not rely on this
344
+ # mechanism as it will be removed in the near future
345
+ for k, v in config_values():
346
+ if k.startswith("METAFLOW_CONDA_") or k.startswith("METAFLOW_DEBUG_"):
347
+ jobset.environment_variable(k, v)
348
+
349
+ if S3_SERVER_SIDE_ENCRYPTION is not None:
350
+ jobset.environment_variable(
351
+ "METAFLOW_S3_SERVER_SIDE_ENCRYPTION", S3_SERVER_SIDE_ENCRYPTION
352
+ )
353
+
354
+ # Set environment variables to support metaflow.integrations.ArgoEvent
355
+ jobset.environment_variable(
356
+ "METAFLOW_ARGO_EVENTS_WEBHOOK_URL", ARGO_EVENTS_INTERNAL_WEBHOOK_URL
357
+ )
358
+ jobset.environment_variable("METAFLOW_ARGO_EVENTS_EVENT", ARGO_EVENTS_EVENT)
359
+ jobset.environment_variable(
360
+ "METAFLOW_ARGO_EVENTS_EVENT_BUS", ARGO_EVENTS_EVENT_BUS
361
+ )
362
+ jobset.environment_variable(
363
+ "METAFLOW_ARGO_EVENTS_EVENT_SOURCE", ARGO_EVENTS_EVENT_SOURCE
364
+ )
365
+ jobset.environment_variable(
366
+ "METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT", ARGO_EVENTS_SERVICE_ACCOUNT
367
+ )
368
+ jobset.environment_variable(
369
+ "METAFLOW_ARGO_EVENTS_WEBHOOK_AUTH",
370
+ ARGO_EVENTS_WEBHOOK_AUTH,
371
+ )
372
+
373
+ ## -----Jobset specific env vars START here-----
374
+ jobset.environment_variable("MF_MASTER_ADDR", jobset.jobset_control_addr)
375
+ jobset.environment_variable("MF_MASTER_PORT", str(port))
376
+ jobset.environment_variable("MF_WORLD_SIZE", str(num_parallel))
377
+ jobset.environment_variable_from_selector(
378
+ "JOBSET_RESTART_ATTEMPT",
379
+ "metadata.annotations['jobset.sigs.k8s.io/restart-attempt']",
380
+ )
381
+ jobset.environment_variable_from_selector(
382
+ "METAFLOW_KUBERNETES_JOBSET_NAME",
383
+ "metadata.annotations['jobset.sigs.k8s.io/jobset-name']",
384
+ )
385
+ jobset.environment_variable_from_selector(
386
+ "MF_WORKER_REPLICA_INDEX",
387
+ "metadata.annotations['jobset.sigs.k8s.io/job-index']",
388
+ )
389
+ ## -----Jobset specific env vars END here-----
390
+
391
+ tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
392
+ if tmpfs_enabled and tmpfs_tempdir:
393
+ jobset.environment_variable("METAFLOW_TEMPDIR", tmpfs_path)
394
+
395
+ for name, value in env.items():
396
+ jobset.environment_variable(name, value)
397
+
398
+ system_annotations = {
399
+ "metaflow/user": user,
400
+ "metaflow/flow_name": flow_name,
401
+ "metaflow/control-task-id": task_id,
402
+ "metaflow/run_id": run_id,
403
+ "metaflow/step_name": step_name,
404
+ "metaflow/attempt": attempt,
405
+ }
406
+ if current.get("project_name"):
407
+ system_annotations.update(
408
+ {
409
+ "metaflow/project_name": current.project_name,
410
+ "metaflow/branch_name": current.branch_name,
411
+ "metaflow/project_flow_name": current.project_flow_name,
412
+ }
413
+ )
414
+
415
+ system_labels = {
416
+ "app.kubernetes.io/name": "metaflow-task",
417
+ "app.kubernetes.io/part-of": "metaflow",
418
+ }
419
+
420
+ jobset.labels({**({} if not labels else labels), **system_labels})
421
+
422
+ jobset.annotations(
423
+ {**({} if not annotations else annotations), **system_annotations}
424
+ )
425
+ # We need this task-id set so that all the nodes are aware of the control
426
+ # task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
427
+ jobset.environment_variable("MF_PARALLEL_CONTROL_TASK_ID", str(task_id))
428
+
429
+ ## ----------- control/worker specific values START here -----------
430
+ # We will now set the appropriate command for the control/worker job
431
+ _get_command = lambda index, _tskid: self._command(
432
+ flow_name=flow_name,
433
+ run_id=run_id,
434
+ step_name=step_name,
435
+ task_id=_tskid,
436
+ attempt=attempt,
437
+ code_package_metadata=code_package_metadata,
438
+ code_package_url=code_package_url,
439
+ step_cmds=[
440
+ step_cli.replace(
441
+ METAFLOW_PARALLEL_STEP_CLI_OPTIONS_TEMPLATE,
442
+ "--ubf-context $UBF_CONTEXT --split-index %s --task-id %s"
443
+ % (index, _tskid),
444
+ )
445
+ ],
446
+ )
447
+ jobset.control.replicas(1)
448
+ jobset.worker.replicas(num_parallel - 1)
449
+
450
+ # We set the appropriate command for the control/worker job
451
+ # and also set the task-id/spit-index for the control/worker job
452
+ # appropirately.
453
+ jobset.control.command(_get_command("0", str(task_id)))
454
+ jobset.worker.command(
455
+ _get_command(
456
+ "`expr $[MF_WORKER_REPLICA_INDEX] + 1`",
457
+ "-".join(
458
+ [
459
+ str(task_id),
460
+ "worker",
461
+ "$MF_WORKER_REPLICA_INDEX",
462
+ ]
463
+ ),
464
+ )
465
+ )
147
466
 
148
- def create_job(
467
+ jobset.control.environment_variable("UBF_CONTEXT", UBF_CONTROL)
468
+ jobset.worker.environment_variable("UBF_CONTEXT", UBF_TASK)
469
+ # Every control job requires an environment variable of MF_CONTROL_INDEX
470
+ # set to 0 so that we can derive the MF_PARALLEL_NODE_INDEX correctly.
471
+ # Since only the control job has MF_CONTROL_INDE set to 0, all worker nodes
472
+ # will use MF_WORKER_REPLICA_INDEX
473
+ jobset.control.environment_variable("MF_CONTROL_INDEX", "0")
474
+ ## ----------- control/worker specific values END here -----------
475
+
476
+ return jobset
477
+
478
+ def create_job_object(
149
479
  self,
150
480
  flow_name,
151
481
  run_id,
@@ -153,12 +483,14 @@ class Kubernetes(object):
153
483
  task_id,
154
484
  attempt,
155
485
  user,
486
+ code_package_metadata,
156
487
  code_package_sha,
157
488
  code_package_url,
158
489
  code_package_ds,
159
490
  step_cli,
160
491
  docker_image,
161
492
  docker_image_pull_policy,
493
+ image_pull_secrets=None,
162
494
  service_account=None,
163
495
  secrets=None,
164
496
  node_selector=None,
@@ -177,19 +509,19 @@ class Kubernetes(object):
177
509
  persistent_volume_claims=None,
178
510
  tolerations=None,
179
511
  labels=None,
180
- annotations=None,
181
- num_parallel=0,
182
- attrs={},
183
512
  shared_memory=None,
184
513
  port=None,
514
+ name_pattern=None,
515
+ qos=None,
516
+ annotations=None,
517
+ security_context=None,
185
518
  ):
186
519
  if env is None:
187
520
  env = {}
188
-
189
521
  job = (
190
522
  KubernetesClient()
191
523
  .job(
192
- generate_name="t-{uid}-".format(uid=str(uuid4())[:8]),
524
+ generate_name=name_pattern,
193
525
  namespace=namespace,
194
526
  service_account=service_account,
195
527
  secrets=secrets,
@@ -200,11 +532,13 @@ class Kubernetes(object):
200
532
  step_name=step_name,
201
533
  task_id=task_id,
202
534
  attempt=attempt,
535
+ code_package_metadata=code_package_metadata,
203
536
  code_package_url=code_package_url,
204
537
  step_cmds=[step_cli],
205
538
  ),
206
539
  image=docker_image,
207
540
  image_pull_policy=docker_image_pull_policy,
541
+ image_pull_secrets=image_pull_secrets,
208
542
  cpu=cpu,
209
543
  memory=memory,
210
544
  disk=disk,
@@ -215,17 +549,19 @@ class Kubernetes(object):
215
549
  retries=0,
216
550
  step_name=step_name,
217
551
  tolerations=tolerations,
218
- labels=self._get_labels(labels),
552
+ labels=labels,
553
+ annotations=annotations,
219
554
  use_tmpfs=use_tmpfs,
220
555
  tmpfs_tempdir=tmpfs_tempdir,
221
556
  tmpfs_size=tmpfs_size,
222
557
  tmpfs_path=tmpfs_path,
223
558
  persistent_volume_claims=persistent_volume_claims,
224
- num_parallel=num_parallel,
225
- attrs=attrs,
226
559
  shared_memory=shared_memory,
227
560
  port=port,
561
+ qos=qos,
562
+ security_context=security_context,
228
563
  )
564
+ .environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
229
565
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
230
566
  .environment_variable("METAFLOW_CODE_URL", code_package_url)
231
567
  .environment_variable("METAFLOW_CODE_DS", code_package_ds)
@@ -261,6 +597,9 @@ class Kubernetes(object):
261
597
  .environment_variable(
262
598
  "METAFLOW_GCP_SECRET_MANAGER_PREFIX", GCP_SECRET_MANAGER_PREFIX
263
599
  )
600
+ .environment_variable(
601
+ "METAFLOW_AZURE_KEY_VAULT_PREFIX", AZURE_KEY_VAULT_PREFIX
602
+ )
264
603
  .environment_variable("METAFLOW_S3_ENDPOINT_URL", S3_ENDPOINT_URL)
265
604
  .environment_variable(
266
605
  "METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT",
@@ -276,6 +615,18 @@ class Kubernetes(object):
276
615
  .environment_variable(
277
616
  "METAFLOW_INIT_SCRIPT", KUBERNETES_SANDBOX_INIT_SCRIPT
278
617
  )
618
+ .environment_variable(
619
+ "METAFLOW_KUBERNETES_SANDBOX_INIT_SCRIPT",
620
+ KUBERNETES_SANDBOX_INIT_SCRIPT,
621
+ )
622
+ .environment_variable(
623
+ "METAFLOW_ARGO_WORKFLOWS_KUBERNETES_SECRETS",
624
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS,
625
+ )
626
+ .environment_variable(
627
+ "METAFLOW_ARGO_WORKFLOWS_ENV_VARS_TO_SKIP",
628
+ ARGO_WORKFLOWS_ENV_VARS_TO_SKIP,
629
+ )
279
630
  .environment_variable("METAFLOW_OTEL_ENDPOINT", OTEL_ENDPOINT)
280
631
  # Skip setting METAFLOW_DATASTORE_SYSROOT_LOCAL because metadata sync
281
632
  # between the local user instance and the remote Kubernetes pod
@@ -284,7 +635,6 @@ class Kubernetes(object):
284
635
  # see get_datastore_root_from_config in datastore/local.py).
285
636
  )
286
637
 
287
- self.num_parallel = num_parallel
288
638
  # Temporary passing of *some* environment variables. Do not rely on this
289
639
  # mechanism as it will be removed in the near future
290
640
  for k, v in config_values():
@@ -321,13 +671,25 @@ class Kubernetes(object):
321
671
 
322
672
  for name, value in env.items():
323
673
  job.environment_variable(name, value)
674
+ # Add job specific labels
675
+ system_labels = {
676
+ "app.kubernetes.io/name": "metaflow-task",
677
+ "app.kubernetes.io/part-of": "metaflow",
678
+ }
679
+ for name, value in system_labels.items():
680
+ job.label(name, value)
324
681
 
325
- annotations = {
326
- "metaflow/user": user,
682
+ # Add job specific annotations not set in the decorator.
683
+ system_annotations = {
327
684
  "metaflow/flow_name": flow_name,
685
+ "metaflow/run_id": run_id,
686
+ "metaflow/step_name": step_name,
687
+ "metaflow/task_id": task_id,
688
+ "metaflow/attempt": attempt,
689
+ "metaflow/user": user,
328
690
  }
329
691
  if current.get("project_name"):
330
- annotations.update(
692
+ system_annotations.update(
331
693
  {
332
694
  "metaflow/project_name": current.project_name,
333
695
  "metaflow/branch_name": current.branch_name,
@@ -335,18 +697,12 @@ class Kubernetes(object):
335
697
  }
336
698
  )
337
699
 
338
- for name, value in annotations.items():
700
+ for name, value in system_annotations.items():
339
701
  job.annotation(name, value)
340
702
 
341
- (
342
- job.annotation("metaflow/run_id", run_id)
343
- .annotation("metaflow/step_name", step_name)
344
- .annotation("metaflow/task_id", task_id)
345
- .annotation("metaflow/attempt", attempt)
346
- .label("app.kubernetes.io/name", "metaflow-task")
347
- .label("app.kubernetes.io/part-of", "metaflow")
348
- )
703
+ return job
349
704
 
705
+ def create_k8sjob(self, job):
350
706
  return job.create()
351
707
 
352
708
  def wait(self, stdout_location, stderr_location, echo=None):
@@ -360,7 +716,7 @@ class Kubernetes(object):
360
716
  sigmoid = 1.0 / (1.0 + math.exp(-0.01 * secs_since_start + 9.0))
361
717
  return 0.5 + sigmoid * 30.0
362
718
 
363
- def wait_for_launch(job, child_jobs):
719
+ def wait_for_launch(job):
364
720
  status = job.status
365
721
  echo(
366
722
  "Task is starting (%s)..." % status,
@@ -370,60 +726,43 @@ class Kubernetes(object):
370
726
  t = time.time()
371
727
  start_time = time.time()
372
728
  while job.is_waiting:
373
- # new_status = job.status
374
- if status != job.status or (time.time() - t) > 30:
375
- if not child_jobs:
376
- child_statuses = ""
377
- else:
378
- status_keys = set(
379
- [child_job.status for child_job in child_jobs]
380
- )
381
- status_counts = [
382
- (
383
- status,
384
- len(
385
- [
386
- child_job.status == status
387
- for child_job in child_jobs
388
- ]
389
- ),
390
- )
391
- for status in status_keys
392
- ]
393
- child_statuses = " (parallel node status: [{}])".format(
394
- ", ".join(
395
- [
396
- "{}:{}".format(status, num)
397
- for (status, num) in sorted(status_counts)
398
- ]
399
- )
400
- )
401
-
402
- status = job.status
729
+ new_status = job.status
730
+ if status != new_status or (time.time() - t) > 30:
731
+ status = new_status
403
732
  echo(
404
- "Task is starting (status %s)... %s" % (status, child_statuses),
733
+ "Task is starting (%s)..." % status,
405
734
  "stderr",
406
735
  job_id=job.id,
407
736
  )
408
737
  t = time.time()
409
738
  time.sleep(update_delay(time.time() - start_time))
410
739
 
411
- prefix = b"[%s] " % util.to_bytes(self._job.id)
740
+ prefix = lambda: b"[%s] " % util.to_bytes(self._job.id)
412
741
 
413
742
  stdout_tail = get_log_tailer(stdout_location, self._datastore.TYPE)
414
743
  stderr_tail = get_log_tailer(stderr_location, self._datastore.TYPE)
415
744
 
416
- child_jobs = []
417
745
  # 1) Loop until the job has started
418
- wait_for_launch(self._job, child_jobs)
746
+ wait_for_launch(self._job)
419
747
 
420
748
  # 2) Tail logs until the job has finished
749
+ self._output_final_logs = False
750
+
751
+ def _has_updates():
752
+ if self._job.is_running:
753
+ return True
754
+ # Make sure to output final tail for a job that has finished.
755
+ if not self._output_final_logs:
756
+ self._output_final_logs = True
757
+ return True
758
+ return False
759
+
421
760
  tail_logs(
422
- prefix=prefix,
761
+ prefix=prefix(),
423
762
  stdout_tail=stdout_tail,
424
763
  stderr_tail=stderr_tail,
425
764
  echo=echo,
426
- has_log_updates=lambda: self._job.is_running,
765
+ has_log_updates=_has_updates,
427
766
  )
428
767
  # 3) Fetch remaining logs
429
768
  #
@@ -435,7 +774,6 @@ class Kubernetes(object):
435
774
  # exists prior to calling S3Tail and note the user about
436
775
  # truncated logs if it doesn't.
437
776
  # TODO : For hard crashes, we can fetch logs from the pod.
438
-
439
777
  if self._job.has_failed:
440
778
  exit_code, reason = self._job.reason
441
779
  msg = next(
@@ -469,60 +807,3 @@ class Kubernetes(object):
469
807
  "stderr",
470
808
  job_id=self._job.id,
471
809
  )
472
-
473
- @staticmethod
474
- def _get_labels(extra_labels=None):
475
- if extra_labels is None:
476
- extra_labels = {}
477
- env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
478
- env_labels = parse_kube_keyvalue_list(env_labels, False)
479
- labels = {**env_labels, **extra_labels}
480
- validate_kube_labels(labels)
481
- return labels
482
-
483
-
484
- def validate_kube_labels(
485
- labels: Optional[Dict[str, Optional[str]]],
486
- ) -> bool:
487
- """Validate label values.
488
-
489
- This validates the kubernetes label values. It does not validate the keys.
490
- Ideally, keys should be static and also the validation rules for keys are
491
- more complex than those for values. For full validation rules, see:
492
-
493
- https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
494
- """
495
-
496
- def validate_label(s: Optional[str]):
497
- regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
498
- if not s:
499
- # allow empty label
500
- return True
501
- if not re.search(regex_match, s):
502
- raise KubernetesException(
503
- 'Invalid value: "%s"\n'
504
- "A valid label must be an empty string or one that\n"
505
- " - Consist of alphanumeric, '-', '_' or '.' characters\n"
506
- " - Begins and ends with an alphanumeric character\n"
507
- " - Is at most 63 characters" % s
508
- )
509
- return True
510
-
511
- return all([validate_label(v) for v in labels.values()]) if labels else True
512
-
513
-
514
- def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
515
- try:
516
- ret = {}
517
- for item_str in items:
518
- item = item_str.split("=", 1)
519
- if requires_both:
520
- item[1] # raise IndexError
521
- if str(item[0]) in ret:
522
- raise KubernetesException("Duplicate key found: %s" % str(item[0]))
523
- ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
524
- return ret
525
- except KubernetesException as e:
526
- raise e
527
- except (AttributeError, IndexError):
528
- raise KubernetesException("Unable to parse kubernetes list: %s" % items)