ob-metaflow 2.11.15.3__py2.py3-none-any.whl → 2.11.16.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (28) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/clone_util.py +6 -0
  3. metaflow/extension_support/plugins.py +1 -1
  4. metaflow/metaflow_config.py +5 -3
  5. metaflow/metaflow_environment.py +3 -3
  6. metaflow/plugins/__init__.py +4 -4
  7. metaflow/plugins/argo/argo_workflows.py +8 -0
  8. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +18 -14
  9. metaflow/plugins/datatools/s3/s3.py +1 -1
  10. metaflow/plugins/gcp/__init__.py +1 -1
  11. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
  12. metaflow/plugins/kubernetes/kubernetes.py +79 -49
  13. metaflow/plugins/kubernetes/kubernetes_cli.py +20 -33
  14. metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
  15. metaflow/plugins/kubernetes/kubernetes_decorator.py +44 -61
  16. metaflow/plugins/kubernetes/kubernetes_job.py +217 -584
  17. metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
  18. metaflow/plugins/timeout_decorator.py +2 -1
  19. metaflow/task.py +1 -12
  20. metaflow/tuple_util.py +27 -0
  21. metaflow/util.py +0 -15
  22. metaflow/version.py +1 -1
  23. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/METADATA +2 -2
  24. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/RECORD +28 -26
  25. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/LICENSE +0 -0
  26. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/WHEEL +0 -0
  27. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/entry_points.txt +0 -0
  28. {ob_metaflow-2.11.15.3.dist-info → ob_metaflow-2.11.16.2.dist-info}/top_level.txt +0 -0
@@ -2,18 +2,23 @@ import json
2
2
  import math
3
3
  import random
4
4
  import time
5
- import os
6
- import socket
7
5
  import copy
8
-
6
+ import sys
7
+ from metaflow.tracing import inject_tracing_vars
9
8
  from metaflow.exception import MetaflowException
10
9
  from metaflow.metaflow_config import KUBERNETES_SECRETS
10
+ from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
11
11
 
12
12
  CLIENT_REFRESH_INTERVAL_SECONDS = 300
13
+ from .kubernetes_jobsets import (
14
+ KubernetesJobSet, # We need this import for Kubernetes Client.
15
+ )
16
+
13
17
 
14
18
  class KubernetesJobException(MetaflowException):
15
19
  headline = "Kubernetes job error"
16
20
 
21
+
17
22
  # Implements truncated exponential backoff from
18
23
  # https://cloud.google.com/storage/docs/retry-strategy#exponential-backoff
19
24
  def k8s_retry(deadline_seconds=60, max_backoff=32):
@@ -56,19 +61,7 @@ class KubernetesJob(object):
56
61
  self._client = client
57
62
  self._kwargs = kwargs
58
63
 
59
- def create(self):
60
- # A discerning eye would notice and question the choice of using the
61
- # V1Job construct over the V1Pod construct given that we don't rely much
62
- # on any of the V1Job semantics. The major reasons at the moment are -
63
- # 1. It makes the Kubernetes UIs (Octant, Lens) a bit easier on
64
- # the eyes, although even that can be questioned.
65
- # 2. AWS Step Functions, at the moment (Apr' 22) only supports
66
- # executing Jobs and not Pods as part of it's publicly declared
67
- # API. When we ship the AWS Step Functions integration with EKS,
68
- # it will hopefully lessen our workload.
69
- #
70
- # Note: This implementation ensures that there is only one unique Pod
71
- # (unique UID) per Metaflow task attempt.
64
+ def create_job_spec(self):
72
65
  client = self._client.get()
73
66
 
74
67
  # tmpfs variables
@@ -80,529 +73,103 @@ class KubernetesJob(object):
80
73
  if self._kwargs["shared_memory"]
81
74
  else None
82
75
  )
83
-
84
- jobset_name = "js-%s" % self._kwargs["attrs"]["metaflow.task_id"].split('-')[-1]
85
- main_job_name = "control"
86
- main_job_index = 0
87
- main_pod_index = 0
88
- subdomain = jobset_name
89
- master_port = int(self._kwargs['port']) if self._kwargs['port'] else None
90
- shared_memory = int(self._kwargs['shared_memory']) if self._kwargs['shared_memory'] else None
91
-
92
- passwordless_ssh = self._kwargs["attrs"]["requires_passwordless_ssh"]
93
- if passwordless_ssh:
94
- passwordless_ssh_service_name = subdomain
95
- passwordless_ssh_service_selector = {
96
- "passwordless-ssh-jobset": "true"
97
- }
98
- else:
99
- passwordless_ssh_service_name = None
100
- passwordless_ssh_service_selector = {}
101
-
102
- fqdn_suffix = "%s.svc.cluster.local" % self._kwargs["namespace"]
103
- jobset_main_addr = "%s-%s-%s-%s.%s.%s" % (
104
- jobset_name,
105
- main_job_name,
106
- main_job_index,
107
- main_pod_index,
108
- subdomain,
109
- fqdn_suffix,
110
- )
111
-
112
- def _install_jobset(
113
- repo_url="https://github.com/kubernetes-sigs/jobset",
114
- python_sdk_path="jobset/sdk/python",
115
- ):
116
-
117
- # TODO (Eddie): Remove this and suggest to user.
118
-
119
- import subprocess
120
- import tempfile
121
- import shutil
122
- import os
123
-
124
- with open(os.devnull, "wb") as devnull:
125
- cwd = os.getcwd()
126
- tmp_dir = tempfile.mkdtemp()
127
- os.chdir(tmp_dir)
128
- subprocess.check_call(
129
- ["git", "clone", repo_url], stdout=devnull, stderr=subprocess.STDOUT
130
- )
131
- tmp_python_sdk_path = os.path.join(tmp_dir, python_sdk_path)
132
- os.chdir(tmp_python_sdk_path)
133
- subprocess.check_call(
134
- ["pip", "install", "."], stdout=devnull, stderr=subprocess.STDOUT
135
- )
136
- os.chdir(cwd)
137
- shutil.rmtree(tmp_dir)
138
-
139
- def _get_passwordless_ssh_service():
140
-
141
- return client.V1Service(
142
- api_version="v1",
143
- kind="Service",
144
- metadata=client.V1ObjectMeta(
145
- name=passwordless_ssh_service_name,
146
- namespace=self._kwargs["namespace"]
147
- ),
148
- spec=client.V1ServiceSpec(
149
- cluster_ip="None",
150
- internal_traffic_policy="Cluster",
151
- ip_families=["IPv4"],
152
- ip_family_policy="SingleStack",
153
- selector=passwordless_ssh_service_selector,
154
- session_affinity="None",
155
- type="ClusterIP",
156
- ports=[
157
- client.V1ServicePort(
158
- name="control",
159
- port=22,
160
- protocol="TCP",
161
- target_port=22
162
- )
163
- ]
164
- )
165
- )
166
-
167
- def _get_replicated_job(job_name, parallelism, command):
168
- return jobset.models.jobset_v1alpha2_replicated_job.JobsetV1alpha2ReplicatedJob(
169
- name=job_name,
170
- template=client.V1JobTemplateSpec(
171
- metadata=client.V1ObjectMeta(
172
- annotations=self._kwargs.get("annotations", {}),
173
- labels=self._kwargs.get("labels", {}),
174
- namespace=self._kwargs["namespace"],
175
- ),
176
- spec=client.V1JobSpec(
177
- parallelism=parallelism, # how many jobs can run at once
178
- completions=parallelism, # how many Pods the JobSet creates in total
179
- backoff_limit=0,
180
- ttl_seconds_after_finished=7
181
- * 60
182
- * 60
183
- * 24,
184
- template=client.V1PodTemplateSpec(
185
- metadata=client.V1ObjectMeta(
186
- annotations=self._kwargs.get("annotations", {}),
187
- labels={
188
- **self._kwargs.get("labels", {}),
189
- **passwordless_ssh_service_selector, # TODO: necessary?
190
- # TODO: cluster-name, app.kubernetes.io/name necessary?
191
- },
192
- namespace=self._kwargs["namespace"],
193
- ),
194
- spec=client.V1PodSpec(
195
- active_deadline_seconds=self._kwargs[
196
- "timeout_in_seconds"
197
- ],
198
- containers=[
199
- client.V1Container(
200
- command=command,
201
- ports=[client.V1ContainerPort(container_port=master_port)] if master_port and job_name=="control" else [],
202
- env=[
203
- client.V1EnvVar(name=k, value=str(v))
204
- for k, v in self._kwargs.get(
205
- "environment_variables", {}
206
- ).items()
207
- ]
208
- + [
209
- client.V1EnvVar(
210
- name=k,
211
- value_from=client.V1EnvVarSource(
212
- field_ref=client.V1ObjectFieldSelector(
213
- field_path=str(v)
214
- )
215
- ),
216
- )
217
- for k, v in {
218
- "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
219
- "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
220
- "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
221
- "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
222
- "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
223
- }.items()
224
- ]
225
- # Mimicking the AWS Batch Multinode env vars.
226
- + [
227
- client.V1EnvVar(
228
- name="MASTER_ADDR",
229
- value=jobset_main_addr,
230
- ),
231
- client.V1EnvVar(
232
- name="MASTER_PORT",
233
- value=str(master_port),
234
- ),
235
- client.V1EnvVar(
236
- name="RANK",
237
- value_from=client.V1EnvVarSource(
238
- field_ref=client.V1ObjectFieldSelector(
239
- field_path="metadata.annotations['batch.kubernetes.io/job-completion-index']"
240
- )
241
- ),
242
- ),
243
- client.V1EnvVar(
244
- name="WORLD_SIZE",
245
- value=str(self._kwargs["num_parallel"]),
246
- ),
247
- client.V1EnvVar(
248
- name="PYTHONUNBUFFERED",
249
- value="0",
250
- ),
251
- ],
252
- env_from=[
253
- client.V1EnvFromSource(
254
- secret_ref=client.V1SecretEnvSource(
255
- name=str(k),
256
- # optional=True
257
- )
258
- )
259
- for k in list(
260
- self._kwargs.get("secrets", [])
261
- )
262
- + KUBERNETES_SECRETS.split(",")
263
- if k
264
- ],
265
- image=self._kwargs["image"],
266
- image_pull_policy=self._kwargs[
267
- "image_pull_policy"
268
- ],
269
- name=self._kwargs["step_name"].replace(
270
- "_", "-"
271
- ),
272
- resources=client.V1ResourceRequirements(
273
- requests={
274
- "cpu": str(self._kwargs["cpu"]),
275
- "memory": "%sM"
276
- % str(self._kwargs["memory"]),
277
- "ephemeral-storage": "%sM"
278
- % str(self._kwargs["disk"]),
279
- },
280
- limits={
281
- "%s.com/gpu".lower()
282
- % self._kwargs["gpu_vendor"]: str(
283
- self._kwargs["gpu"]
284
- )
285
- for k in [0]
286
- # Don't set GPU limits if gpu isn't specified.
287
- if self._kwargs["gpu"] is not None
288
- },
289
- ),
290
- volume_mounts=(
291
- [
292
- client.V1VolumeMount(
293
- mount_path=self._kwargs.get(
294
- "tmpfs_path"
295
- ),
296
- name="tmpfs-ephemeral-volume",
297
- )
298
- ]
299
- if tmpfs_enabled
300
- else []
301
- )
302
- + (
303
- [
304
- client.V1VolumeMount(
305
- mount_path="/dev/shm",
306
- name="dhsm"
307
- )
308
- ]
309
- if shared_memory else []
310
- )
311
- + (
312
- [
313
- client.V1VolumeMount(
314
- mount_path=path, name=claim
315
- )
316
- for claim, path in self._kwargs[
317
- "persistent_volume_claims"
318
- ].items()
319
- ]
320
- if self._kwargs["persistent_volume_claims"]
321
- is not None
322
- else []
323
- ),
324
- )
325
- ],
326
- node_selector=self._kwargs.get("node_selector"),
327
- restart_policy="Never",
328
-
329
- set_hostname_as_fqdn=True, # configure pod hostname as pod's FQDN
330
- share_process_namespace=False, # default
331
- subdomain=subdomain, # FQDN = <hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>
332
-
333
- service_account_name=self._kwargs["service_account"],
334
- termination_grace_period_seconds=0,
335
- tolerations=[
336
- client.V1Toleration(**toleration)
337
- for toleration in self._kwargs.get("tolerations")
338
- or []
339
- ],
340
- volumes=(
341
- [
342
- client.V1Volume(
343
- name="tmpfs-ephemeral-volume",
344
- empty_dir=client.V1EmptyDirVolumeSource(
345
- medium="Memory",
346
- size_limit="{}Mi".format(tmpfs_size),
347
- ),
348
- )
349
- ]
350
- if tmpfs_enabled
351
- else []
352
- )
353
- + (
354
- [
355
- client.V1Volume(
356
- name="dhsm",
357
- empty_dir=client.V1EmptyDirVolumeSource(
358
- medium="Memory",
359
- size_limit="{}Mi".format(shared_memory),
360
- )
361
- )
362
- ]
363
- if shared_memory else []
364
- )
365
- + (
366
- [
367
- client.V1Volume(
368
- name=claim,
369
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
370
- claim_name=claim
371
- ),
372
- )
373
- for claim in self._kwargs[
374
- "persistent_volume_claims"
375
- ].keys()
376
- ]
377
- if self._kwargs["persistent_volume_claims"]
378
- is not None
379
- else []
380
- ),
381
- ),
382
- ),
383
- ),
384
- ),
385
- )
386
-
387
- if "num_parallel" in self._kwargs and self._kwargs["num_parallel"] >= 1:
388
-
389
- try:
390
- import jobset
391
- except ImportError:
392
- _install_jobset()
393
- import jobset
394
-
395
- main_commands = copy.copy(self._kwargs["command"])
396
- main_commands[-1] = main_commands[-1].replace(
397
- "[multinode-args]", "--split-index 0"
398
- )
399
-
400
- task_id = self._kwargs["attrs"]["metaflow.task_id"]
401
- secondary_commands = copy.copy(self._kwargs["command"])
402
- # RANK needs +1 because control node is not in the worker index group, yet we want global nodes.
403
- # Technically, control and worker could be same replicated job type, but cleaner to separate for future use cases.
404
- secondary_commands[-1] = secondary_commands[-1].replace(
405
- "[multinode-args]", "--split-index `expr $RANK + 1`"
406
- )
407
- secondary_commands[-1] = secondary_commands[-1].replace(
408
- "ubf_control", "ubf_task"
409
- )
410
- secondary_commands[-1] = secondary_commands[-1].replace(
411
- task_id,
412
- task_id.replace("control-", "") + "-node-`expr $RANK + 1`",
413
- )
414
-
415
- if passwordless_ssh:
416
- if not os.path.exists("/usr/sbin/sshd"):
417
- raise KubernetesJobException(
418
- "This @parallel decorator requires sshd to be installed in the container image."
419
- "Please install OpenSSH."
420
- )
421
-
422
- # run sshd in background
423
- main_commands[-1] = "/usr/sbin/sshd -D & %s" % main_commands[-1]
424
- secondary_commands[-1] = "/usr/sbin/sshd -D & %s" % secondary_commands[-1]
425
-
426
- replicated_jobs = [_get_replicated_job("control", 1, main_commands)]
427
- if self._kwargs["num_parallel"] > 1:
428
- replicated_jobs.append(
429
- _get_replicated_job("worker", self._kwargs["num_parallel"] - 1, secondary_commands)
430
- )
431
-
432
- self._jobset = jobset.models.jobset_v1alpha2_job_set.JobsetV1alpha2JobSet(
433
- api_version="jobset.x-k8s.io/v1alpha2",
434
- kind="JobSet",
76
+ return client.V1JobSpec(
77
+ # Retries are handled by Metaflow when it is responsible for
78
+ # executing the flow. The responsibility is moved to Kubernetes
79
+ # when Argo Workflows is responsible for the execution.
80
+ backoff_limit=self._kwargs.get("retries", 0),
81
+ completions=self._kwargs.get("completions", 1),
82
+ ttl_seconds_after_finished=7
83
+ * 60
84
+ * 60 # Remove job after a week. TODO: Make this configurable
85
+ * 24,
86
+ template=client.V1PodTemplateSpec(
435
87
  metadata=client.V1ObjectMeta(
436
88
  annotations=self._kwargs.get("annotations", {}),
437
89
  labels=self._kwargs.get("labels", {}),
438
- name=jobset_name,
439
90
  namespace=self._kwargs["namespace"],
440
91
  ),
441
- spec=jobset.models.jobset_v1alpha2_job_set_spec.JobsetV1alpha2JobSetSpec(
442
- network=jobset.models.jobset_v1alpha2_network.JobsetV1alpha2Network(
443
- enable_dns_hostnames=True if not self._kwargs['attrs']['requires_passwordless_ssh'] else False,
444
- subdomain=subdomain
445
- ),
446
- replicated_jobs=replicated_jobs
447
- ),
448
- )
449
- self._passwordless_ssh_service = _get_passwordless_ssh_service()
450
- else:
451
- self._job = client.V1Job(
452
- api_version="batch/v1",
453
- kind="Job",
454
- metadata=client.V1ObjectMeta(
455
- # Annotations are for humans
456
- annotations=self._kwargs.get("annotations", {}),
457
- # While labels are for Kubernetes
458
- labels=self._kwargs.get("labels", {}),
459
- generate_name=self._kwargs["generate_name"],
460
- namespace=self._kwargs["namespace"], # Defaults to `default`
461
- ),
462
- spec=client.V1JobSpec(
463
- # Retries are handled by Metaflow when it is responsible for
464
- # executing the flow. The responsibility is moved to Kubernetes
465
- # when Argo Workflows is responsible for the execution.
466
- backoff_limit=self._kwargs.get("retries", 0),
467
- completions=1, # A single non-indexed pod job
468
- ttl_seconds_after_finished=7
469
- * 60
470
- * 60 # Remove job after a week. TODO: Make this configurable
471
- * 24,
472
- template=client.V1PodTemplateSpec(
473
- metadata=client.V1ObjectMeta(
474
- annotations=self._kwargs.get("annotations", {}),
475
- labels=self._kwargs.get("labels", {}),
476
- namespace=self._kwargs["namespace"],
477
- ),
478
- spec=client.V1PodSpec(
479
- # Timeout is set on the pod and not the job (important!)
480
- active_deadline_seconds=self._kwargs["timeout_in_seconds"],
481
- # TODO (savin): Enable affinities for GPU scheduling.
482
- # affinity=?,
483
- containers=[
484
- client.V1Container(
485
- command=self._kwargs["command"],
486
- ports=[
487
- client.V1ContainerPort(
488
- container_port=int(self._kwargs["port"])
489
- )
490
- ]
491
- if "port" in self._kwargs and self._kwargs["port"]
492
- else None,
493
- env=[
494
- client.V1EnvVar(name=k, value=str(v))
495
- for k, v in self._kwargs.get(
496
- "environment_variables", {}
497
- ).items()
498
- ]
499
- # And some downward API magic. Add (key, value)
500
- # pairs below to make pod metadata available
501
- # within Kubernetes container.
502
- + [
503
- client.V1EnvVar(
504
- name=k,
505
- value_from=client.V1EnvVarSource(
506
- field_ref=client.V1ObjectFieldSelector(
507
- field_path=str(v)
508
- )
509
- ),
510
- )
511
- for k, v in {
512
- "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
513
- "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
514
- "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
515
- "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
516
- "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
517
- }.items()
518
- ],
519
- env_from=[
520
- client.V1EnvFromSource(
521
- secret_ref=client.V1SecretEnvSource(
522
- name=str(k),
523
- # optional=True
524
- )
92
+ spec=client.V1PodSpec(
93
+ # Timeout is set on the pod and not the job (important!)
94
+ active_deadline_seconds=self._kwargs["timeout_in_seconds"],
95
+ # TODO (savin): Enable affinities for GPU scheduling.
96
+ # affinity=?,
97
+ containers=[
98
+ client.V1Container(
99
+ command=self._kwargs["command"],
100
+ ports=[]
101
+ if self._kwargs["port"] is None
102
+ else [
103
+ client.V1ContainerPort(
104
+ container_port=int(self._kwargs["port"])
105
+ )
106
+ ],
107
+ env=[
108
+ client.V1EnvVar(name=k, value=str(v))
109
+ for k, v in self._kwargs.get(
110
+ "environment_variables", {}
111
+ ).items()
112
+ ]
113
+ # And some downward API magic. Add (key, value)
114
+ # pairs below to make pod metadata available
115
+ # within Kubernetes container.
116
+ + [
117
+ client.V1EnvVar(
118
+ name=k,
119
+ value_from=client.V1EnvVarSource(
120
+ field_ref=client.V1ObjectFieldSelector(
121
+ field_path=str(v)
525
122
  )
526
- for k in list(self._kwargs.get("secrets", []))
527
- + KUBERNETES_SECRETS.split(",")
528
- if k
529
- ],
530
- image=self._kwargs["image"],
531
- image_pull_policy=self._kwargs["image_pull_policy"],
532
- name=self._kwargs["step_name"].replace("_", "-"),
533
- resources=client.V1ResourceRequirements(
534
- requests={
535
- "cpu": str(self._kwargs["cpu"]),
536
- "memory": "%sM"
537
- % str(self._kwargs["memory"]),
538
- "ephemeral-storage": "%sM"
539
- % str(self._kwargs["disk"]),
540
- },
541
- limits={
542
- "%s.com/gpu".lower()
543
- % self._kwargs["gpu_vendor"]: str(
544
- self._kwargs["gpu"]
545
- )
546
- for k in [0]
547
- # Don't set GPU limits if gpu isn't specified.
548
- if self._kwargs["gpu"] is not None
549
- },
550
- ),
551
- volume_mounts=(
552
- [
553
- client.V1VolumeMount(
554
- mount_path=self._kwargs.get(
555
- "tmpfs_path"
556
- ),
557
- name="tmpfs-ephemeral-volume",
558
- )
559
- ]
560
- if tmpfs_enabled
561
- else []
562
- )
563
- + (
564
- [
565
- client.V1VolumeMount(
566
- mount_path=path, name=claim
567
- )
568
- for claim, path in self._kwargs[
569
- "persistent_volume_claims"
570
- ].items()
571
- ]
572
- if self._kwargs["persistent_volume_claims"]
573
- is not None
574
- else []
575
123
  ),
576
124
  )
125
+ for k, v in {
126
+ "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
127
+ "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
128
+ "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
129
+ "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
130
+ "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
131
+ }.items()
132
+ ]
133
+ + [
134
+ client.V1EnvVar(name=k, value=str(v))
135
+ for k, v in inject_tracing_vars({}).items()
577
136
  ],
578
- node_selector=self._kwargs.get("node_selector"),
579
- # TODO (savin): Support image_pull_secrets
580
- # image_pull_secrets=?,
581
- # TODO (savin): Support preemption policies
582
- # preemption_policy=?,
583
- #
584
- # A Container in a Pod may fail for a number of
585
- # reasons, such as because the process in it exited
586
- # with a non-zero exit code, or the Container was
587
- # killed due to OOM etc. If this happens, fail the pod
588
- # and let Metaflow handle the retries.
589
- restart_policy="Never",
590
- service_account_name=self._kwargs["service_account"],
591
- # Terminate the container immediately on SIGTERM
592
- termination_grace_period_seconds=0,
593
- tolerations=[
594
- client.V1Toleration(**toleration)
595
- for toleration in self._kwargs.get("tolerations") or []
137
+ env_from=[
138
+ client.V1EnvFromSource(
139
+ secret_ref=client.V1SecretEnvSource(
140
+ name=str(k),
141
+ # optional=True
142
+ )
143
+ )
144
+ for k in list(self._kwargs.get("secrets", []))
145
+ + KUBERNETES_SECRETS.split(",")
146
+ if k
596
147
  ],
597
- volumes=(
148
+ image=self._kwargs["image"],
149
+ image_pull_policy=self._kwargs["image_pull_policy"],
150
+ name=self._kwargs["step_name"].replace("_", "-"),
151
+ resources=client.V1ResourceRequirements(
152
+ requests={
153
+ "cpu": str(self._kwargs["cpu"]),
154
+ "memory": "%sM" % str(self._kwargs["memory"]),
155
+ "ephemeral-storage": "%sM"
156
+ % str(self._kwargs["disk"]),
157
+ },
158
+ limits={
159
+ "%s.com/gpu".lower()
160
+ % self._kwargs["gpu_vendor"]: str(
161
+ self._kwargs["gpu"]
162
+ )
163
+ for k in [0]
164
+ # Don't set GPU limits if gpu isn't specified.
165
+ if self._kwargs["gpu"] is not None
166
+ },
167
+ ),
168
+ volume_mounts=(
598
169
  [
599
- client.V1Volume(
170
+ client.V1VolumeMount(
171
+ mount_path=self._kwargs.get("tmpfs_path"),
600
172
  name="tmpfs-ephemeral-volume",
601
- empty_dir=client.V1EmptyDirVolumeSource(
602
- medium="Memory",
603
- # Add default unit as ours differs from Kubernetes default.
604
- size_limit="{}Mi".format(tmpfs_size),
605
- ),
606
173
  )
607
174
  ]
608
175
  if tmpfs_enabled
@@ -610,24 +177,122 @@ class KubernetesJob(object):
610
177
  )
611
178
  + (
612
179
  [
613
- client.V1Volume(
614
- name=claim,
615
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
616
- claim_name=claim
617
- ),
180
+ client.V1VolumeMount(
181
+ mount_path="/dev/shm", name="dhsm"
618
182
  )
619
- for claim in self._kwargs[
183
+ ]
184
+ if shared_memory
185
+ else []
186
+ )
187
+ + (
188
+ [
189
+ client.V1VolumeMount(mount_path=path, name=claim)
190
+ for claim, path in self._kwargs[
620
191
  "persistent_volume_claims"
621
- ].keys()
192
+ ].items()
622
193
  ]
623
194
  if self._kwargs["persistent_volume_claims"] is not None
624
195
  else []
625
196
  ),
626
- # TODO (savin): Set termination_message_policy
627
- ),
197
+ )
198
+ ],
199
+ node_selector=self._kwargs.get("node_selector"),
200
+ # TODO (savin): Support image_pull_secrets
201
+ # image_pull_secrets=?,
202
+ # TODO (savin): Support preemption policies
203
+ # preemption_policy=?,
204
+ #
205
+ # A Container in a Pod may fail for a number of
206
+ # reasons, such as because the process in it exited
207
+ # with a non-zero exit code, or the Container was
208
+ # killed due to OOM etc. If this happens, fail the pod
209
+ # and let Metaflow handle the retries.
210
+ restart_policy="Never",
211
+ service_account_name=self._kwargs["service_account"],
212
+ # Terminate the container immediately on SIGTERM
213
+ termination_grace_period_seconds=0,
214
+ tolerations=[
215
+ client.V1Toleration(**toleration)
216
+ for toleration in self._kwargs.get("tolerations") or []
217
+ ],
218
+ volumes=(
219
+ [
220
+ client.V1Volume(
221
+ name="tmpfs-ephemeral-volume",
222
+ empty_dir=client.V1EmptyDirVolumeSource(
223
+ medium="Memory",
224
+ # Add default unit as ours differs from Kubernetes default.
225
+ size_limit="{}Mi".format(tmpfs_size),
226
+ ),
227
+ )
228
+ ]
229
+ if tmpfs_enabled
230
+ else []
231
+ )
232
+ + (
233
+ [
234
+ client.V1Volume(
235
+ name="dhsm",
236
+ empty_dir=client.V1EmptyDirVolumeSource(
237
+ medium="Memory",
238
+ size_limit="{}Mi".format(shared_memory),
239
+ ),
240
+ )
241
+ ]
242
+ if shared_memory
243
+ else []
244
+ )
245
+ + (
246
+ [
247
+ client.V1Volume(
248
+ name=claim,
249
+ persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
250
+ claim_name=claim
251
+ ),
252
+ )
253
+ for claim in self._kwargs["persistent_volume_claims"].keys()
254
+ ]
255
+ if self._kwargs["persistent_volume_claims"] is not None
256
+ else []
628
257
  ),
258
+ # TODO (savin): Set termination_message_policy
629
259
  ),
630
- )
260
+ ),
261
+ )
262
+
263
+ def create(self):
264
+ # A discerning eye would notice and question the choice of using the
265
+ # V1Job construct over the V1Pod construct given that we don't rely much
266
+ # on any of the V1Job semantics. The major reasons at the moment are -
267
+ # 1. It makes the Kubernetes UIs (Octant, Lens) a bit easier on
268
+ # the eyes, although even that can be questioned.
269
+ # 2. AWS Step Functions, at the moment (Apr' 22) only supports
270
+ # executing Jobs and not Pods as part of it's publicly declared
271
+ # API. When we ship the AWS Step Functions integration with EKS,
272
+ # it will hopefully lessen our workload.
273
+ #
274
+ # Note: This implementation ensures that there is only one unique Pod
275
+ # (unique UID) per Metaflow task attempt.
276
+ client = self._client.get()
277
+
278
+ # tmpfs variables
279
+ use_tmpfs = self._kwargs["use_tmpfs"]
280
+ tmpfs_size = self._kwargs["tmpfs_size"]
281
+ tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
282
+
283
+ self._job = client.V1Job(
284
+ api_version="batch/v1",
285
+ kind="Job",
286
+ metadata=client.V1ObjectMeta(
287
+ # Annotations are for humans
288
+ annotations=self._kwargs.get("annotations", {}),
289
+ # While labels are for Kubernetes
290
+ labels=self._kwargs.get("labels", {}),
291
+ generate_name=self._kwargs["generate_name"],
292
+ namespace=self._kwargs["namespace"], # Defaults to `default`
293
+ ),
294
+ spec=self.create_job_spec(),
295
+ )
631
296
  return self
632
297
 
633
298
  def execute(self):
@@ -638,53 +303,19 @@ class KubernetesJob(object):
638
303
  # achieve the guarantees that we are seeking.
639
304
  # https://github.com/kubernetes/enhancements/issues/1040
640
305
  # Hopefully, we will be able to get creative with kube-batch
641
-
642
- if "num_parallel" in self._kwargs and self._kwargs["num_parallel"] >= 1:
643
- # TODO (Eddie): this is kinda gross. fix it.
644
- if self._kwargs["attrs"]["requires_passwordless_ssh"]:
645
- api_instance = client.CoreV1Api()
646
- api_response = api_instance.create_namespaced_service(namespace=self._kwargs['namespace'], body=self._passwordless_ssh_service)
647
-
648
- with client.ApiClient() as api_client:
649
- api_instance = client.CustomObjectsApi(api_client)
650
-
651
- response = api_instance.create_namespaced_custom_object(
652
- body=self._jobset,
653
- group="jobset.x-k8s.io",
654
- version="v1alpha2",
655
- namespace=self._kwargs["namespace"],
656
- plural="jobsets",
657
- )
658
-
659
- # HACK: Give K8s some time to actually create the job
660
- time.sleep(10)
661
-
662
- # TODO (Eddie): Remove hack and make RunningJobSet.
663
- # There are many jobs running that should be monitored.
664
- job_name = "%s-control-0" % response["metadata"]["name"]
665
- fake_id = 123
666
- return RunningJob(
667
- client=self._client,
668
- name=job_name,
669
- uid=fake_id,
670
- namespace=response["metadata"]["namespace"],
671
- )
672
-
673
- else:
674
- response = (
675
- client.BatchV1Api()
676
- .create_namespaced_job(
677
- body=self._job, namespace=self._kwargs["namespace"]
678
- )
679
- .to_dict()
680
- )
681
- return RunningJob(
682
- client=self._client,
683
- name=response["metadata"]["name"],
684
- uid=response["metadata"]["uid"],
685
- namespace=response["metadata"]["namespace"],
306
+ response = (
307
+ client.BatchV1Api()
308
+ .create_namespaced_job(
309
+ body=self._job, namespace=self._kwargs["namespace"]
686
310
  )
687
-
311
+ .to_dict()
312
+ )
313
+ return RunningJob(
314
+ client=self._client,
315
+ name=response["metadata"]["name"],
316
+ uid=response["metadata"]["uid"],
317
+ namespace=response["metadata"]["namespace"],
318
+ )
688
319
  except client.rest.ApiException as e:
689
320
  raise KubernetesJobException(
690
321
  "Unable to launch Kubernetes job.\n %s"
@@ -740,6 +371,7 @@ class KubernetesJob(object):
740
371
 
741
372
 
742
373
  class RunningJob(object):
374
+
743
375
  # State Machine implementation for the lifecycle behavior documented in
744
376
  # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
745
377
  #
@@ -793,7 +425,7 @@ class RunningJob(object):
793
425
  def best_effort_kill():
794
426
  try:
795
427
  self.kill()
796
- except:
428
+ except Exception as ex:
797
429
  pass
798
430
 
799
431
  atexit.register(best_effort_kill)
@@ -857,6 +489,7 @@ class RunningJob(object):
857
489
  # 3. If the pod object hasn't shown up yet, we set the parallelism to 0
858
490
  # to preempt it.
859
491
  client = self._client.get()
492
+
860
493
  if not self.is_done:
861
494
  if self.is_running:
862
495
  # Case 1.