ob-metaflow 2.10.11.1__py2.py3-none-any.whl → 2.11.0.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

@@ -296,6 +296,8 @@ KUBERNETES_CONTAINER_REGISTRY = from_conf(
296
296
  )
297
297
  # Toggle for trying to fetch EC2 instance metadata
298
298
  KUBERNETES_FETCH_EC2_METADATA = from_conf("KUBERNETES_FETCH_EC2_METADATA", False)
299
+ # Default port number to open on the pods
300
+ KUBERNETES_PORT = from_conf("KUBERNETES_PORT", None)
299
301
 
300
302
  ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
301
303
  ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
@@ -836,6 +836,11 @@ class ArgoWorkflows(object):
836
836
  # Visit every node and yield the uber DAGTemplate(s).
837
837
  def _dag_templates(self):
838
838
  def _visit(node, exit_node=None, templates=None, dag_tasks=None):
839
+ if node.parallel_foreach:
840
+ raise ArgoWorkflowsException(
841
+ "Deploying flows with @parallel decorator(s) "
842
+ "as Argo Workflows is not supported currently."
843
+ )
839
844
  # Every for-each node results in a separate subDAG and an equivalent
840
845
  # DAGTemplate rooted at the child of the for-each node. Each DAGTemplate
841
846
  # has a unique name - the top-level DAGTemplate is named as the name of
@@ -4,6 +4,7 @@ import os
4
4
  import re
5
5
  import shlex
6
6
  import time
7
+ import copy
7
8
  from typing import Dict, List, Optional
8
9
  import uuid
9
10
  from uuid import uuid4
@@ -174,6 +175,10 @@ class Kubernetes(object):
174
175
  persistent_volume_claims=None,
175
176
  tolerations=None,
176
177
  labels=None,
178
+ annotations=None,
179
+ num_parallel=0,
180
+ attrs={},
181
+ port=None,
177
182
  ):
178
183
  if env is None:
179
184
  env = {}
@@ -213,6 +218,9 @@ class Kubernetes(object):
213
218
  tmpfs_size=tmpfs_size,
214
219
  tmpfs_path=tmpfs_path,
215
220
  persistent_volume_claims=persistent_volume_claims,
221
+ num_parallel=num_parallel,
222
+ attrs=attrs,
223
+ port=port,
216
224
  )
217
225
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
218
226
  .environment_variable("METAFLOW_CODE_URL", code_package_url)
@@ -266,6 +274,7 @@ class Kubernetes(object):
266
274
  # see get_datastore_root_from_config in datastore/local.py).
267
275
  )
268
276
 
277
+ self.num_parallel = num_parallel
269
278
  # Temporary passing of *some* environment variables. Do not rely on this
270
279
  # mechanism as it will be removed in the near future
271
280
  for k, v in config_values():
@@ -341,7 +350,7 @@ class Kubernetes(object):
341
350
  sigmoid = 1.0 / (1.0 + math.exp(-0.01 * secs_since_start + 9.0))
342
351
  return 0.5 + sigmoid * 30.0
343
352
 
344
- def wait_for_launch(job):
353
+ def wait_for_launch(job, child_jobs):
345
354
  status = job.status
346
355
  echo(
347
356
  "Task is starting (%s)..." % status,
@@ -351,11 +360,38 @@ class Kubernetes(object):
351
360
  t = time.time()
352
361
  start_time = time.time()
353
362
  while job.is_waiting:
354
- new_status = job.status
355
- if status != new_status or (time.time() - t) > 30:
356
- status = new_status
363
+ # new_status = job.status
364
+ if status != job.status or (time.time() - t) > 30:
365
+ if not child_jobs:
366
+ child_statuses = ""
367
+ else:
368
+ status_keys = set(
369
+ [child_job.status for child_job in child_jobs]
370
+ )
371
+ status_counts = [
372
+ (
373
+ status,
374
+ len(
375
+ [
376
+ child_job.status == status
377
+ for child_job in child_jobs
378
+ ]
379
+ ),
380
+ )
381
+ for status in status_keys
382
+ ]
383
+ child_statuses = " (parallel node status: [{}])".format(
384
+ ", ".join(
385
+ [
386
+ "{}:{}".format(status, num)
387
+ for (status, num) in sorted(status_counts)
388
+ ]
389
+ )
390
+ )
391
+
392
+ status = job.status
357
393
  echo(
358
- "Task is starting (%s)..." % status,
394
+ "Task is starting (status %s)... %s" % (status, child_statuses),
359
395
  "stderr",
360
396
  job_id=job.id,
361
397
  )
@@ -367,8 +403,9 @@ class Kubernetes(object):
367
403
  stdout_tail = get_log_tailer(stdout_location, self._datastore.TYPE)
368
404
  stderr_tail = get_log_tailer(stderr_location, self._datastore.TYPE)
369
405
 
406
+ child_jobs = []
370
407
  # 1) Loop until the job has started
371
- wait_for_launch(self._job)
408
+ wait_for_launch(self._job, child_jobs)
372
409
 
373
410
  # 2) Tail logs until the job has finished
374
411
  tail_logs(
@@ -107,6 +107,26 @@ def kubernetes():
107
107
  type=JSONTypeClass(),
108
108
  multiple=False,
109
109
  )
110
+ @click.option(
111
+ "--labels",
112
+ default=None,
113
+ type=JSONTypeClass(),
114
+ multiple=False,
115
+ )
116
+ @click.option(
117
+ "--annotations",
118
+ default=None,
119
+ type=JSONTypeClass(),
120
+ multiple=False,
121
+ )
122
+ @click.option("--ubf-context", default=None, type=click.Choice([None, "ubf_control"]))
123
+ @click.option(
124
+ "--num-parallel",
125
+ default=0,
126
+ type=int,
127
+ help="Number of parallel nodes to run as a multi-node job.",
128
+ )
129
+ @click.option("--port", default=None, help="port number")
110
130
  @click.pass_context
111
131
  def step(
112
132
  ctx,
@@ -132,6 +152,10 @@ def step(
132
152
  run_time_limit=None,
133
153
  persistent_volume_claims=None,
134
154
  tolerations=None,
155
+ labels=None,
156
+ annotations=None,
157
+ num_parallel=None,
158
+ port=None,
135
159
  **kwargs
136
160
  ):
137
161
  def echo(msg, stream="stderr", job_id=None, **kwargs):
@@ -177,11 +201,17 @@ def step(
177
201
  )
178
202
  time.sleep(minutes_between_retries * 60)
179
203
 
204
+ step_args = " ".join(util.dict_to_cli_options(kwargs))
205
+ num_parallel = num_parallel or 0
206
+ if num_parallel and num_parallel > 1:
207
+ # For multinode, we need to add a placeholder that can be mutated by the caller
208
+ step_args += " [multinode-args]"
209
+
180
210
  step_cli = "{entrypoint} {top_args} step {step} {step_args}".format(
181
211
  entrypoint="%s -u %s" % (executable, os.path.basename(sys.argv[0])),
182
212
  top_args=" ".join(util.dict_to_cli_options(ctx.parent.parent.params)),
183
213
  step=step_name,
184
- step_args=" ".join(util.dict_to_cli_options(kwargs)),
214
+ step_args=step_args,
185
215
  )
186
216
 
187
217
  # Set log tailing.
@@ -207,6 +237,10 @@ def step(
207
237
  ),
208
238
  )
209
239
 
240
+ attrs = {
241
+ "metaflow.task_id": kwargs["task_id"],
242
+ "requires_passwordless_ssh": any([getattr(deco, "requires_passwordless_ssh", False) for deco in node.decorators]),
243
+ }
210
244
  try:
211
245
  kubernetes = Kubernetes(
212
246
  datastore=ctx.obj.flow_datastore,
@@ -245,6 +279,11 @@ def step(
245
279
  env=env,
246
280
  persistent_volume_claims=persistent_volume_claims,
247
281
  tolerations=tolerations,
282
+ labels=labels,
283
+ annotations=annotations,
284
+ num_parallel=num_parallel,
285
+ port=port,
286
+ attrs=attrs,
248
287
  )
249
288
  except Exception as e:
250
289
  traceback.print_exc(chain=False)
@@ -2,6 +2,7 @@ import json
2
2
  import os
3
3
  import platform
4
4
  import sys
5
+ import time
5
6
 
6
7
  from metaflow import current
7
8
  from metaflow.decorators import StepDecorator
@@ -20,10 +21,12 @@ from metaflow.metaflow_config import (
20
21
  KUBERNETES_PERSISTENT_VOLUME_CLAIMS,
21
22
  KUBERNETES_TOLERATIONS,
22
23
  KUBERNETES_SERVICE_ACCOUNT,
24
+ KUBERNETES_PORT,
23
25
  )
24
26
  from metaflow.plugins.resources_decorator import ResourcesDecorator
25
27
  from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
26
28
  from metaflow.sidecar import Sidecar
29
+ from metaflow.unbounded_foreach import UBF_CONTROL
27
30
 
28
31
  from ..aws.aws_utils import get_docker_registry, get_ec2_instance_metadata
29
32
  from .kubernetes import KubernetesException, parse_kube_keyvalue_list
@@ -88,6 +91,8 @@ class KubernetesDecorator(StepDecorator):
88
91
  persistent_volume_claims: Dict[str, str], optional
89
92
  A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
90
93
  volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
94
+ port: int, optional
95
+ Number of the port to specify in the Kubernetes job object
91
96
  """
92
97
 
93
98
  name = "kubernetes"
@@ -110,6 +115,7 @@ class KubernetesDecorator(StepDecorator):
110
115
  "tmpfs_size": None,
111
116
  "tmpfs_path": "/metaflow_temp",
112
117
  "persistent_volume_claims": None, # e.g., {"pvc-name": "/mnt/vol", "another-pvc": "/mnt/vol2"}
118
+ "port": None,
113
119
  }
114
120
  package_url = None
115
121
  package_sha = None
@@ -195,6 +201,8 @@ class KubernetesDecorator(StepDecorator):
195
201
  if not self.attributes["tmpfs_size"]:
196
202
  # default tmpfs behavior - https://man7.org/linux/man-pages/man5/tmpfs.5.html
197
203
  self.attributes["tmpfs_size"] = int(self.attributes["memory"]) // 2
204
+ if not self.attributes["port"]:
205
+ self.attributes["port"] = KUBERNETES_PORT
198
206
 
199
207
  # Refer https://github.com/Netflix/metaflow/blob/master/docs/lifecycle.png
200
208
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
@@ -216,12 +224,6 @@ class KubernetesDecorator(StepDecorator):
216
224
  "Kubernetes. Please use one or the other.".format(step=step)
217
225
  )
218
226
 
219
- for deco in decos:
220
- if getattr(deco, "IS_PARALLEL", False):
221
- raise KubernetesException(
222
- "@kubernetes does not support parallel execution currently."
223
- )
224
-
225
227
  # Set run time limit for the Kubernetes job.
226
228
  self.run_time_limit = get_run_time_limit_for_task(decos)
227
229
  if self.run_time_limit < 60:
@@ -432,6 +434,27 @@ class KubernetesDecorator(StepDecorator):
432
434
  self._save_logs_sidecar = Sidecar("save_logs_periodically")
433
435
  self._save_logs_sidecar.start()
434
436
 
437
+ num_parallel = int(os.environ.get("WORLD_SIZE", 0))
438
+ if num_parallel >= 1:
439
+ if ubf_context == UBF_CONTROL:
440
+ control_task_id = current.task_id
441
+ top_task_id = control_task_id.replace("control-", "")
442
+ mapper_task_ids = [control_task_id] + [
443
+ "%s-node-%d" % (top_task_id, node_idx)
444
+ for node_idx in range(1, num_parallel)
445
+ ]
446
+ flow._control_mapper_tasks = [
447
+ "%s/%s/%s" % (run_id, step_name, mapper_task_id)
448
+ for mapper_task_id in mapper_task_ids
449
+ ]
450
+ flow._control_task_is_mapper_zero = True
451
+ else:
452
+ worker_job_rank = int(os.environ["RANK"])
453
+ os.environ["RANK"] = str(worker_job_rank + 1)
454
+
455
+ if num_parallel >= 1:
456
+ _setup_multinode_environment()
457
+
435
458
  def task_finished(
436
459
  self, step_name, flow, graph, is_task_ok, retry_count, max_retries
437
460
  ):
@@ -459,9 +482,53 @@ class KubernetesDecorator(StepDecorator):
459
482
  # Best effort kill
460
483
  pass
461
484
 
485
+ if is_task_ok and len(getattr(flow, "_control_mapper_tasks", [])) > 1:
486
+ self._wait_for_mapper_tasks(flow, step_name)
487
+
488
+ def _wait_for_mapper_tasks(self, flow, step_name):
489
+ """
490
+ When launching multinode task with UBF, need to wait for the secondary
491
+ tasks to finish cleanly and produce their output before exiting the
492
+ main task. Otherwise, the main task finishing will cause secondary nodes
493
+ to terminate immediately, and possibly prematurely.
494
+ """
495
+ from metaflow import Step # avoid circular dependency
496
+
497
+ TIMEOUT = 600
498
+ last_completion_timeout = time.time() + TIMEOUT
499
+ print("Waiting for batch secondary tasks to finish")
500
+ while last_completion_timeout > time.time():
501
+ time.sleep(2)
502
+ try:
503
+ step_path = "%s/%s/%s" % (flow.name, current.run_id, step_name)
504
+ tasks = [task for task in Step(step_path)]
505
+ if len(tasks) == len(flow._control_mapper_tasks):
506
+ if all(
507
+ task.finished_at is not None for task in tasks
508
+ ): # for some reason task.finished fails
509
+ return True
510
+ else:
511
+ print(
512
+ "Waiting for all parallel tasks to finish. Finished: {}/{}".format(
513
+ len(tasks),
514
+ len(flow._control_mapper_tasks),
515
+ )
516
+ )
517
+ except Exception as e:
518
+ pass
519
+ raise Exception(
520
+ "Batch secondary workers did not finish in %s seconds" % TIMEOUT
521
+ )
522
+
462
523
  @classmethod
463
524
  def _save_package_once(cls, flow_datastore, package):
464
525
  if cls.package_url is None:
465
526
  cls.package_url, cls.package_sha = flow_datastore.save_data(
466
527
  [package.blob], len_hint=1
467
528
  )[0]
529
+
530
+ def _setup_multinode_environment():
531
+ import socket
532
+ os.environ["MF_PARALLEL_MAIN_IP"] = socket.gethostbyname(os.environ["MASTER_ADDR"])
533
+ os.environ["MF_PARALLEL_NUM_NODES"] = os.environ["WORLD_SIZE"]
534
+ os.environ["MF_PARALLEL_NODE_INDEX"] = os.environ["RANK"]
@@ -2,20 +2,18 @@ import json
2
2
  import math
3
3
  import random
4
4
  import time
5
-
6
- from metaflow.tracing import inject_tracing_vars
7
-
5
+ import os
6
+ import socket
7
+ import copy
8
8
 
9
9
  from metaflow.exception import MetaflowException
10
10
  from metaflow.metaflow_config import KUBERNETES_SECRETS
11
11
 
12
12
  CLIENT_REFRESH_INTERVAL_SECONDS = 300
13
13
 
14
-
15
14
  class KubernetesJobException(MetaflowException):
16
15
  headline = "Kubernetes job error"
17
16
 
18
-
19
17
  # Implements truncated exponential backoff from
20
18
  # https://cloud.google.com/storage/docs/retry-strategy#exponential-backoff
21
19
  def k8s_retry(deadline_seconds=60, max_backoff=32):
@@ -78,107 +76,260 @@ class KubernetesJob(object):
78
76
  tmpfs_size = self._kwargs["tmpfs_size"]
79
77
  tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
80
78
 
81
- self._job = client.V1Job(
82
- api_version="batch/v1",
83
- kind="Job",
84
- metadata=client.V1ObjectMeta(
85
- # Annotations are for humans
86
- annotations=self._kwargs.get("annotations", {}),
87
- # While labels are for Kubernetes
88
- labels=self._kwargs.get("labels", {}),
89
- generate_name=self._kwargs["generate_name"],
90
- namespace=self._kwargs["namespace"], # Defaults to `default`
91
- ),
92
- spec=client.V1JobSpec(
93
- # Retries are handled by Metaflow when it is responsible for
94
- # executing the flow. The responsibility is moved to Kubernetes
95
- # when Argo Workflows is responsible for the execution.
96
- backoff_limit=self._kwargs.get("retries", 0),
97
- completions=1, # A single non-indexed pod job
98
- ttl_seconds_after_finished=7
99
- * 60
100
- * 60 # Remove job after a week. TODO: Make this configurable
101
- * 24,
102
- template=client.V1PodTemplateSpec(
79
+ jobset_name = "js-%s" % self._kwargs["attrs"]["metaflow.task_id"].split('-')[-1]
80
+ main_job_name = "control"
81
+ main_job_index = 0
82
+ main_pod_index = 0
83
+ subdomain = jobset_name
84
+ master_port = int(self._kwargs['port']) if self._kwargs['port'] else None
85
+
86
+ passwordless_ssh = self._kwargs["attrs"]["requires_passwordless_ssh"]
87
+ if passwordless_ssh:
88
+ passwordless_ssh_service_name = subdomain
89
+ passwordless_ssh_service_selector = {
90
+ "passwordless-ssh-jobset": "true"
91
+ }
92
+ else:
93
+ passwordless_ssh_service_name = None
94
+ passwordless_ssh_service_selector = {}
95
+
96
+ fqdn_suffix = "%s.svc.cluster.local" % self._kwargs["namespace"]
97
+ jobset_main_addr = "%s-%s-%s-%s.%s.%s" % (
98
+ jobset_name,
99
+ main_job_name,
100
+ main_job_index,
101
+ main_pod_index,
102
+ subdomain,
103
+ fqdn_suffix,
104
+ )
105
+
106
+ def _install_jobset(
107
+ repo_url="https://github.com/kubernetes-sigs/jobset",
108
+ python_sdk_path="jobset/sdk/python",
109
+ ):
110
+
111
+ # TODO (Eddie): Remove this and suggest to user.
112
+
113
+ import subprocess
114
+ import tempfile
115
+ import shutil
116
+ import os
117
+
118
+ with open(os.devnull, "wb") as devnull:
119
+ cwd = os.getcwd()
120
+ tmp_dir = tempfile.mkdtemp()
121
+ os.chdir(tmp_dir)
122
+ subprocess.check_call(
123
+ ["git", "clone", repo_url], stdout=devnull, stderr=subprocess.STDOUT
124
+ )
125
+ tmp_python_sdk_path = os.path.join(tmp_dir, python_sdk_path)
126
+ os.chdir(tmp_python_sdk_path)
127
+ subprocess.check_call(
128
+ ["pip", "install", "."], stdout=devnull, stderr=subprocess.STDOUT
129
+ )
130
+ os.chdir(cwd)
131
+ shutil.rmtree(tmp_dir)
132
+
133
+ def _get_passwordless_ssh_service():
134
+
135
+ return client.V1Service(
136
+ api_version="v1",
137
+ kind="Service",
138
+ metadata=client.V1ObjectMeta(
139
+ name=passwordless_ssh_service_name,
140
+ namespace=self._kwargs["namespace"]
141
+ ),
142
+ spec=client.V1ServiceSpec(
143
+ cluster_ip="None",
144
+ internal_traffic_policy="Cluster",
145
+ ip_families=["IPv4"],
146
+ ip_family_policy="SingleStack",
147
+ selector=passwordless_ssh_service_selector,
148
+ session_affinity="None",
149
+ type="ClusterIP",
150
+ ports=[
151
+ client.V1ServicePort(
152
+ name="control",
153
+ port=22,
154
+ protocol="TCP",
155
+ target_port=22
156
+ )
157
+ ]
158
+ )
159
+ )
160
+
161
+ def _get_replicated_job(job_name, parallelism, command):
162
+ return jobset.models.jobset_v1alpha2_replicated_job.JobsetV1alpha2ReplicatedJob(
163
+ name=job_name,
164
+ template=client.V1JobTemplateSpec(
103
165
  metadata=client.V1ObjectMeta(
104
166
  annotations=self._kwargs.get("annotations", {}),
105
167
  labels=self._kwargs.get("labels", {}),
106
168
  namespace=self._kwargs["namespace"],
107
169
  ),
108
- spec=client.V1PodSpec(
109
- # Timeout is set on the pod and not the job (important!)
110
- active_deadline_seconds=self._kwargs["timeout_in_seconds"],
111
- # TODO (savin): Enable affinities for GPU scheduling.
112
- # affinity=?,
113
- containers=[
114
- client.V1Container(
115
- command=self._kwargs["command"],
116
- env=[
117
- client.V1EnvVar(name=k, value=str(v))
118
- for k, v in self._kwargs.get(
119
- "environment_variables", {}
120
- ).items()
121
- ]
122
- # And some downward API magic. Add (key, value)
123
- # pairs below to make pod metadata available
124
- # within Kubernetes container.
125
- + [
126
- client.V1EnvVar(
127
- name=k,
128
- value_from=client.V1EnvVarSource(
129
- field_ref=client.V1ObjectFieldSelector(
130
- field_path=str(v)
170
+ spec=client.V1JobSpec(
171
+ parallelism=parallelism, # how many jobs can run at once
172
+ completions=parallelism, # how many Pods the JobSet creates in total
173
+ backoff_limit=0,
174
+ ttl_seconds_after_finished=7
175
+ * 60
176
+ * 60
177
+ * 24,
178
+ template=client.V1PodTemplateSpec(
179
+ metadata=client.V1ObjectMeta(
180
+ annotations=self._kwargs.get("annotations", {}),
181
+ labels={
182
+ **self._kwargs.get("labels", {}),
183
+ **passwordless_ssh_service_selector, # TODO: necessary?
184
+ # TODO: cluster-name, app.kubernetes.io/name necessary?
185
+ },
186
+ namespace=self._kwargs["namespace"],
187
+ ),
188
+ spec=client.V1PodSpec(
189
+ active_deadline_seconds=self._kwargs[
190
+ "timeout_in_seconds"
191
+ ],
192
+ containers=[
193
+ client.V1Container(
194
+ command=command,
195
+ ports=[client.V1ContainerPort(container_port=master_port)] if master_port and job_name=="control" else [],
196
+ env=[
197
+ client.V1EnvVar(name=k, value=str(v))
198
+ for k, v in self._kwargs.get(
199
+ "environment_variables", {}
200
+ ).items()
201
+ ]
202
+ + [
203
+ client.V1EnvVar(
204
+ name=k,
205
+ value_from=client.V1EnvVarSource(
206
+ field_ref=client.V1ObjectFieldSelector(
207
+ field_path=str(v)
208
+ )
209
+ ),
210
+ )
211
+ for k, v in {
212
+ "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
213
+ "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
214
+ "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
215
+ "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
216
+ "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
217
+ }.items()
218
+ ]
219
+ # Mimicking the AWS Batch Multinode env vars.
220
+ + [
221
+ client.V1EnvVar(
222
+ name="MASTER_ADDR",
223
+ value=jobset_main_addr,
224
+ ),
225
+ client.V1EnvVar(
226
+ name="MASTER_PORT",
227
+ value=str(master_port),
228
+ ),
229
+ client.V1EnvVar(
230
+ name="RANK",
231
+ value_from=client.V1EnvVarSource(
232
+ field_ref=client.V1ObjectFieldSelector(
233
+ field_path="metadata.annotations['batch.kubernetes.io/job-completion-index']"
234
+ )
235
+ ),
236
+ ),
237
+ client.V1EnvVar(
238
+ name="WORLD_SIZE",
239
+ value=str(self._kwargs["num_parallel"]),
240
+ ),
241
+ client.V1EnvVar(
242
+ name="PYTHONUNBUFFERED",
243
+ value="0",
244
+ ),
245
+ ],
246
+ env_from=[
247
+ client.V1EnvFromSource(
248
+ secret_ref=client.V1SecretEnvSource(
249
+ name=str(k),
250
+ # optional=True
251
+ )
131
252
  )
253
+ for k in list(
254
+ self._kwargs.get("secrets", [])
255
+ )
256
+ + KUBERNETES_SECRETS.split(",")
257
+ if k
258
+ ],
259
+ image=self._kwargs["image"],
260
+ image_pull_policy=self._kwargs[
261
+ "image_pull_policy"
262
+ ],
263
+ name=self._kwargs["step_name"].replace(
264
+ "_", "-"
132
265
  ),
133
- )
134
- for k, v in {
135
- "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
136
- "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
137
- "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
138
- "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
139
- "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
140
- }.items()
141
- ]
142
- + [
143
- client.V1EnvVar(name=k, value=str(v))
144
- for k, v in inject_tracing_vars({}).items()
145
- ],
146
- env_from=[
147
- client.V1EnvFromSource(
148
- secret_ref=client.V1SecretEnvSource(
149
- name=str(k),
150
- # optional=True
266
+ resources=client.V1ResourceRequirements(
267
+ requests={
268
+ "cpu": str(self._kwargs["cpu"]),
269
+ "memory": "%sM"
270
+ % str(self._kwargs["memory"]),
271
+ "ephemeral-storage": "%sM"
272
+ % str(self._kwargs["disk"]),
273
+ },
274
+ limits={
275
+ "%s.com/gpu".lower()
276
+ % self._kwargs["gpu_vendor"]: str(
277
+ self._kwargs["gpu"]
278
+ )
279
+ for k in [0]
280
+ # Don't set GPU limits if gpu isn't specified.
281
+ if self._kwargs["gpu"] is not None
282
+ },
283
+ ),
284
+ volume_mounts=(
285
+ [
286
+ client.V1VolumeMount(
287
+ mount_path=self._kwargs.get(
288
+ "tmpfs_path"
289
+ ),
290
+ name="tmpfs-ephemeral-volume",
291
+ )
292
+ ]
293
+ if tmpfs_enabled
294
+ else []
151
295
  )
296
+ + (
297
+ [
298
+ client.V1VolumeMount(
299
+ mount_path=path, name=claim
300
+ )
301
+ for claim, path in self._kwargs[
302
+ "persistent_volume_claims"
303
+ ].items()
304
+ ]
305
+ if self._kwargs["persistent_volume_claims"]
306
+ is not None
307
+ else []
308
+ ),
152
309
  )
153
- for k in list(self._kwargs.get("secrets", []))
154
- + KUBERNETES_SECRETS.split(",")
155
- if k
156
310
  ],
157
- image=self._kwargs["image"],
158
- image_pull_policy=self._kwargs["image_pull_policy"],
159
- name=self._kwargs["step_name"].replace("_", "-"),
160
- resources=client.V1ResourceRequirements(
161
- requests={
162
- "cpu": str(self._kwargs["cpu"]),
163
- "memory": "%sM" % str(self._kwargs["memory"]),
164
- "ephemeral-storage": "%sM"
165
- % str(self._kwargs["disk"]),
166
- },
167
- limits={
168
- "%s.com/gpu".lower()
169
- % self._kwargs["gpu_vendor"]: str(
170
- self._kwargs["gpu"]
171
- )
172
- for k in [0]
173
- # Don't set GPU limits if gpu isn't specified.
174
- if self._kwargs["gpu"] is not None
175
- },
176
- ),
177
- volume_mounts=(
311
+ node_selector=self._kwargs.get("node_selector"),
312
+ restart_policy="Never",
313
+
314
+ set_hostname_as_fqdn=True, # configure pod hostname as pod's FQDN
315
+ share_process_namespace=False, # default
316
+ subdomain=subdomain, # FQDN = <hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>
317
+
318
+ service_account_name=self._kwargs["service_account"],
319
+ termination_grace_period_seconds=0,
320
+ tolerations=[
321
+ client.V1Toleration(**toleration)
322
+ for toleration in self._kwargs.get("tolerations")
323
+ or []
324
+ ],
325
+ volumes=(
178
326
  [
179
- client.V1VolumeMount(
180
- mount_path=self._kwargs.get("tmpfs_path"),
327
+ client.V1Volume(
181
328
  name="tmpfs-ephemeral-volume",
329
+ empty_dir=client.V1EmptyDirVolumeSource(
330
+ medium="Memory",
331
+ size_limit="{}Mi".format(tmpfs_size),
332
+ ),
182
333
  )
183
334
  ]
184
335
  if tmpfs_enabled
@@ -186,72 +337,264 @@ class KubernetesJob(object):
186
337
  )
187
338
  + (
188
339
  [
189
- client.V1VolumeMount(
190
- mount_path=path, name=claim
340
+ client.V1Volume(
341
+ name=claim,
342
+ persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
343
+ claim_name=claim
344
+ ),
191
345
  )
192
- for claim, path in self._kwargs[
346
+ for claim in self._kwargs[
193
347
  "persistent_volume_claims"
194
- ].items()
348
+ ].keys()
195
349
  ]
196
350
  if self._kwargs["persistent_volume_claims"]
197
351
  is not None
198
352
  else []
199
353
  ),
200
- )
201
- ],
202
- node_selector=self._kwargs.get("node_selector"),
203
- # TODO (savin): Support image_pull_secrets
204
- # image_pull_secrets=?,
205
- # TODO (savin): Support preemption policies
206
- # preemption_policy=?,
207
- #
208
- # A Container in a Pod may fail for a number of
209
- # reasons, such as because the process in it exited
210
- # with a non-zero exit code, or the Container was
211
- # killed due to OOM etc. If this happens, fail the pod
212
- # and let Metaflow handle the retries.
213
- restart_policy="Never",
214
- service_account_name=self._kwargs["service_account"],
215
- # Terminate the container immediately on SIGTERM
216
- termination_grace_period_seconds=0,
217
- tolerations=[
218
- client.V1Toleration(**toleration)
219
- for toleration in self._kwargs.get("tolerations") or []
220
- ],
221
- volumes=(
222
- [
223
- client.V1Volume(
224
- name="tmpfs-ephemeral-volume",
225
- empty_dir=client.V1EmptyDirVolumeSource(
226
- medium="Memory",
227
- # Add default unit as ours differs from Kubernetes default.
228
- size_limit="{}Mi".format(tmpfs_size),
354
+ ),
355
+ ),
356
+ ),
357
+ ),
358
+ )
359
+
360
+ if "num_parallel" in self._kwargs and self._kwargs["num_parallel"] >= 1:
361
+
362
+ try:
363
+ import jobset
364
+ except ImportError:
365
+ _install_jobset()
366
+ import jobset
367
+
368
+ main_commands = copy.copy(self._kwargs["command"])
369
+ main_commands[-1] = main_commands[-1].replace(
370
+ "[multinode-args]", "--split-index 0"
371
+ )
372
+
373
+ task_id = self._kwargs["attrs"]["metaflow.task_id"]
374
+ secondary_commands = copy.copy(self._kwargs["command"])
375
+ # RANK needs +1 because control node is not in the worker index group, yet we want global nodes.
376
+ # Technically, control and worker could be same replicated job type, but cleaner to separate for future use cases.
377
+ secondary_commands[-1] = secondary_commands[-1].replace(
378
+ "[multinode-args]", "--split-index `expr $RANK + 1`"
379
+ )
380
+ secondary_commands[-1] = secondary_commands[-1].replace(
381
+ "ubf_control", "ubf_task"
382
+ )
383
+ secondary_commands[-1] = secondary_commands[-1].replace(
384
+ task_id,
385
+ task_id.replace("control-", "") + "-node-`expr $RANK + 1`",
386
+ )
387
+
388
+ if passwordless_ssh:
389
+ if not os.path.exists("/usr/sbin/sshd"):
390
+ raise KubernetesJobException(
391
+ "This @parallel decorator requires sshd to be installed in the container image."
392
+ "Please install OpenSSH."
393
+ )
394
+
395
+ # run sshd in background
396
+ main_commands[-1] = "/usr/sbin/sshd -D & %s" % main_commands[-1]
397
+ secondary_commands[-1] = "/usr/sbin/sshd -D & %s" % secondary_commands[-1]
398
+
399
+ self._jobset = jobset.models.jobset_v1alpha2_job_set.JobsetV1alpha2JobSet(
400
+ api_version="jobset.x-k8s.io/v1alpha2",
401
+ kind="JobSet",
402
+ metadata=client.V1ObjectMeta(
403
+ annotations=self._kwargs.get("annotations", {}),
404
+ labels=self._kwargs.get("labels", {}),
405
+ name=jobset_name,
406
+ namespace=self._kwargs["namespace"],
407
+ ),
408
+ spec=jobset.models.jobset_v1alpha2_job_set_spec.JobsetV1alpha2JobSetSpec(
409
+ network=jobset.models.jobset_v1alpha2_network.JobsetV1alpha2Network(
410
+ enable_dns_hostnames=True if not self._kwargs['attrs']['requires_passwordless_ssh'] else False,
411
+ subdomain=subdomain
412
+ ),
413
+ replicated_jobs=[
414
+ _get_replicated_job("control", 1, main_commands),
415
+ _get_replicated_job(
416
+ "worker",
417
+ self._kwargs["num_parallel"] - 1,
418
+ secondary_commands,
419
+ ),
420
+ ],
421
+ ),
422
+ )
423
+ self._passwordless_ssh_service = _get_passwordless_ssh_service()
424
+ else:
425
+ self._job = client.V1Job(
426
+ api_version="batch/v1",
427
+ kind="Job",
428
+ metadata=client.V1ObjectMeta(
429
+ # Annotations are for humans
430
+ annotations=self._kwargs.get("annotations", {}),
431
+ # While labels are for Kubernetes
432
+ labels=self._kwargs.get("labels", {}),
433
+ generate_name=self._kwargs["generate_name"],
434
+ namespace=self._kwargs["namespace"], # Defaults to `default`
435
+ ),
436
+ spec=client.V1JobSpec(
437
+ # Retries are handled by Metaflow when it is responsible for
438
+ # executing the flow. The responsibility is moved to Kubernetes
439
+ # when Argo Workflows is responsible for the execution.
440
+ backoff_limit=self._kwargs.get("retries", 0),
441
+ completions=1, # A single non-indexed pod job
442
+ ttl_seconds_after_finished=7
443
+ * 60
444
+ * 60 # Remove job after a week. TODO: Make this configurable
445
+ * 24,
446
+ template=client.V1PodTemplateSpec(
447
+ metadata=client.V1ObjectMeta(
448
+ annotations=self._kwargs.get("annotations", {}),
449
+ labels=self._kwargs.get("labels", {}),
450
+ namespace=self._kwargs["namespace"],
451
+ ),
452
+ spec=client.V1PodSpec(
453
+ # Timeout is set on the pod and not the job (important!)
454
+ active_deadline_seconds=self._kwargs["timeout_in_seconds"],
455
+ # TODO (savin): Enable affinities for GPU scheduling.
456
+ # affinity=?,
457
+ containers=[
458
+ client.V1Container(
459
+ command=self._kwargs["command"],
460
+ env=[
461
+ client.V1EnvVar(name=k, value=str(v))
462
+ for k, v in self._kwargs.get(
463
+ "environment_variables", {}
464
+ ).items()
465
+ ]
466
+ # And some downward API magic. Add (key, value)
467
+ # pairs below to make pod metadata available
468
+ # within Kubernetes container.
469
+ + [
470
+ client.V1EnvVar(
471
+ name=k,
472
+ value_from=client.V1EnvVarSource(
473
+ field_ref=client.V1ObjectFieldSelector(
474
+ field_path=str(v)
475
+ )
476
+ ),
477
+ )
478
+ for k, v in {
479
+ "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
480
+ "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
481
+ "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
482
+ "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
483
+ "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
484
+ }.items()
485
+ ],
486
+ env_from=[
487
+ client.V1EnvFromSource(
488
+ secret_ref=client.V1SecretEnvSource(
489
+ name=str(k),
490
+ # optional=True
491
+ )
492
+ )
493
+ for k in list(self._kwargs.get("secrets", []))
494
+ + KUBERNETES_SECRETS.split(",")
495
+ if k
496
+ ],
497
+ image=self._kwargs["image"],
498
+ image_pull_policy=self._kwargs["image_pull_policy"],
499
+ name=self._kwargs["step_name"].replace("_", "-"),
500
+ resources=client.V1ResourceRequirements(
501
+ requests={
502
+ "cpu": str(self._kwargs["cpu"]),
503
+ "memory": "%sM"
504
+ % str(self._kwargs["memory"]),
505
+ "ephemeral-storage": "%sM"
506
+ % str(self._kwargs["disk"]),
507
+ },
508
+ limits={
509
+ "%s.com/gpu".lower()
510
+ % self._kwargs["gpu_vendor"]: str(
511
+ self._kwargs["gpu"]
512
+ )
513
+ for k in [0]
514
+ # Don't set GPU limits if gpu isn't specified.
515
+ if self._kwargs["gpu"] is not None
516
+ },
229
517
  ),
230
- )
231
- ]
232
- if tmpfs_enabled
233
- else []
234
- )
235
- + (
236
- [
237
- client.V1Volume(
238
- name=claim,
239
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
240
- claim_name=claim
518
+ volume_mounts=(
519
+ [
520
+ client.V1VolumeMount(
521
+ mount_path=self._kwargs.get(
522
+ "tmpfs_path"
523
+ ),
524
+ name="tmpfs-ephemeral-volume",
525
+ )
526
+ ]
527
+ if tmpfs_enabled
528
+ else []
529
+ )
530
+ + (
531
+ [
532
+ client.V1VolumeMount(
533
+ mount_path=path, name=claim
534
+ )
535
+ for claim, path in self._kwargs[
536
+ "persistent_volume_claims"
537
+ ].items()
538
+ ]
539
+ if self._kwargs["persistent_volume_claims"]
540
+ is not None
541
+ else []
241
542
  ),
242
543
  )
243
- for claim in self._kwargs[
244
- "persistent_volume_claims"
245
- ].keys()
246
- ]
247
- if self._kwargs["persistent_volume_claims"] is not None
248
- else []
544
+ ],
545
+ node_selector=self._kwargs.get("node_selector"),
546
+ # TODO (savin): Support image_pull_secrets
547
+ # image_pull_secrets=?,
548
+ # TODO (savin): Support preemption policies
549
+ # preemption_policy=?,
550
+ #
551
+ # A Container in a Pod may fail for a number of
552
+ # reasons, such as because the process in it exited
553
+ # with a non-zero exit code, or the Container was
554
+ # killed due to OOM etc. If this happens, fail the pod
555
+ # and let Metaflow handle the retries.
556
+ restart_policy="Never",
557
+ service_account_name=self._kwargs["service_account"],
558
+ # Terminate the container immediately on SIGTERM
559
+ termination_grace_period_seconds=0,
560
+ tolerations=[
561
+ client.V1Toleration(**toleration)
562
+ for toleration in self._kwargs.get("tolerations") or []
563
+ ],
564
+ volumes=(
565
+ [
566
+ client.V1Volume(
567
+ name="tmpfs-ephemeral-volume",
568
+ empty_dir=client.V1EmptyDirVolumeSource(
569
+ medium="Memory",
570
+ # Add default unit as ours differs from Kubernetes default.
571
+ size_limit="{}Mi".format(tmpfs_size),
572
+ ),
573
+ )
574
+ ]
575
+ if tmpfs_enabled
576
+ else []
577
+ )
578
+ + (
579
+ [
580
+ client.V1Volume(
581
+ name=claim,
582
+ persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
583
+ claim_name=claim
584
+ ),
585
+ )
586
+ for claim in self._kwargs[
587
+ "persistent_volume_claims"
588
+ ].keys()
589
+ ]
590
+ if self._kwargs["persistent_volume_claims"] is not None
591
+ else []
592
+ ),
593
+ # TODO (savin): Set termination_message_policy
249
594
  ),
250
- # TODO (savin): Set termination_message_policy
251
595
  ),
252
596
  ),
253
- ),
254
- )
597
+ )
255
598
  return self
256
599
 
257
600
  def execute(self):
@@ -262,19 +605,53 @@ class KubernetesJob(object):
262
605
  # achieve the guarantees that we are seeking.
263
606
  # https://github.com/kubernetes/enhancements/issues/1040
264
607
  # Hopefully, we will be able to get creative with kube-batch
265
- response = (
266
- client.BatchV1Api()
267
- .create_namespaced_job(
268
- body=self._job, namespace=self._kwargs["namespace"]
608
+
609
+ if "num_parallel" in self._kwargs and self._kwargs["num_parallel"] >= 1:
610
+ # TODO (Eddie): this is kinda gross. fix it.
611
+ if self._kwargs["attrs"]["requires_passwordless_ssh"]:
612
+ api_instance = client.CoreV1Api()
613
+ api_response = api_instance.create_namespaced_service(namespace=self._kwargs['namespace'], body=self._passwordless_ssh_service)
614
+
615
+ with client.ApiClient() as api_client:
616
+ api_instance = client.CustomObjectsApi(api_client)
617
+
618
+ response = api_instance.create_namespaced_custom_object(
619
+ body=self._jobset,
620
+ group="jobset.x-k8s.io",
621
+ version="v1alpha2",
622
+ namespace=self._kwargs["namespace"],
623
+ plural="jobsets",
269
624
  )
270
- .to_dict()
271
- )
272
- return RunningJob(
273
- client=self._client,
274
- name=response["metadata"]["name"],
275
- uid=response["metadata"]["uid"],
276
- namespace=response["metadata"]["namespace"],
277
- )
625
+
626
+ # HACK: Give K8s some time to actually create the job
627
+ time.sleep(10)
628
+
629
+ # TODO (Eddie): Remove hack and make RunningJobSet.
630
+ # There are many jobs running that should be monitored.
631
+ job_name = "%s-control-0" % response["metadata"]["name"]
632
+ fake_id = 123
633
+ return RunningJob(
634
+ client=self._client,
635
+ name=job_name,
636
+ uid=fake_id,
637
+ namespace=response["metadata"]["namespace"],
638
+ )
639
+
640
+ else:
641
+ response = (
642
+ client.BatchV1Api()
643
+ .create_namespaced_job(
644
+ body=self._job, namespace=self._kwargs["namespace"]
645
+ )
646
+ .to_dict()
647
+ )
648
+ return RunningJob(
649
+ client=self._client,
650
+ name=response["metadata"]["name"],
651
+ uid=response["metadata"]["uid"],
652
+ namespace=response["metadata"]["namespace"],
653
+ )
654
+
278
655
  except client.rest.ApiException as e:
279
656
  raise KubernetesJobException(
280
657
  "Unable to launch Kubernetes job.\n %s"
@@ -330,7 +707,6 @@ class KubernetesJob(object):
330
707
 
331
708
 
332
709
  class RunningJob(object):
333
-
334
710
  # State Machine implementation for the lifecycle behavior documented in
335
711
  # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
336
712
  #
@@ -450,7 +826,6 @@ class RunningJob(object):
450
826
  client = self._client.get()
451
827
  if not self.is_done:
452
828
  if self.is_running:
453
-
454
829
  # Case 1.
455
830
  from kubernetes.stream import stream
456
831
 
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.10.11.1"
1
+ metaflow_version = "2.11.0.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow
3
- Version: 2.10.11.1
3
+ Version: 2.11.0.2
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Netflix, Outerbounds & the Metaflow Community
6
6
  Author-email: help@outerbounds.co
@@ -15,7 +15,7 @@ metaflow/graph.py,sha256=ZPxyG8uwVMk5YYgX4pQEQaPZtZM5Wy-G4NtJK73IEuA,11818
15
15
  metaflow/includefile.py,sha256=BVQLYTLZN7m3ibFnsTU70dPj9YskxZeQb1FosV3k4-o,19721
16
16
  metaflow/integrations.py,sha256=LlsaoePRg03DjENnmLxZDYto3NwWc9z_PtU6nJxLldg,1480
17
17
  metaflow/lint.py,sha256=_kYAbAtsP7IG1Rd0FqNbo8I8Zs66_0WXbaZJFARO3dE,10394
18
- metaflow/metaflow_config.py,sha256=XZMXv79h60-yP1c6GD78tcwRq6FX5yLpt2ALRRtoCj4,18986
18
+ metaflow/metaflow_config.py,sha256=LBEDdQskwtstZxhtSP9ONInccjZAjB7nWBrBce_Fpg0,19081
19
19
  metaflow/metaflow_config_funcs.py,sha256=pCaiQ2ez9wXixJI3ehmf3QiW9lUqFrZnBZx1my_0wIg,4874
20
20
  metaflow/metaflow_environment.py,sha256=JdsmQsYp1SDQniQ0-q1mKRrmzSFfYuzrf6jLEHmyaiM,7352
21
21
  metaflow/metaflow_profile.py,sha256=jKPEW-hmAQO-htSxb9hXaeloLacAh41A35rMZH6G8pA,418
@@ -33,7 +33,7 @@ metaflow/task.py,sha256=yGNU3T3giKiG--vE0DUj_K-8jur2TclCS45XjPVLcq4,25314
33
33
  metaflow/unbounded_foreach.py,sha256=p184WMbrMJ3xKYHwewj27ZhRUsSj_kw1jlye5gA9xJk,387
34
34
  metaflow/util.py,sha256=jbMJ17rK-dFTBCjimWqxkfcr3v__bHa3tZtX0g8iS2c,13257
35
35
  metaflow/vendor.py,sha256=LZgXrh7ZSDmD32D1T5jj3OKKpXIqqxKzdMAOc5V0SD4,5162
36
- metaflow/version.py,sha256=we5fgY997QHteYdrr1V0ZYnlRhuIe9yUxyP2LejQuv8,31
36
+ metaflow/version.py,sha256=gXS_wIDHs2sEK4Lt7UOfOM6t13X5UUPilPOmvUUcpgA,30
37
37
  metaflow/_vendor/__init__.py,sha256=y_CiwUD3l4eAKvTVDZeqgVujMy31cAM1qjAB-HfI-9s,353
38
38
  metaflow/_vendor/click/__init__.py,sha256=FkyGDQ-cbiQxP_lxgUspyFYS48f2S_pTcfKPz-d_RMo,2463
39
39
  metaflow/_vendor/click/_bashcomplete.py,sha256=9J98IHQYmCAr2Jup6TDshUr5FJEen-AoQCZR0K5nKxQ,12309
@@ -145,7 +145,7 @@ metaflow/plugins/airflow/sensors/s3_sensor.py,sha256=zym4mUm_f_gBsvHHVqGtX_OOxRj
145
145
  metaflow/plugins/argo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
146
  metaflow/plugins/argo/argo_client.py,sha256=MKKhMCbWOPzf6z5zQQiyDRHHkAXcO7ipboDZDqAAvOk,15849
147
147
  metaflow/plugins/argo/argo_events.py,sha256=TIEOHrvUQ079YzzpzpFFtZjeU2x7hoofyZ6ytCorm2c,5911
148
- metaflow/plugins/argo/argo_workflows.py,sha256=du8R0vI2R0_Z2c-MOAdLldOCkoHl2c-G2mDbARKysV4,119741
148
+ metaflow/plugins/argo/argo_workflows.py,sha256=h-zXFauJce-44eKA8vh9UaW5kIdaN7irz4QwfrfFCNQ,119978
149
149
  metaflow/plugins/argo/argo_workflows_cli.py,sha256=sZTpgfmc50eT3e0qIxpVqUgWhTcYlO1HM4gU6Oaya8g,33259
150
150
  metaflow/plugins/argo/argo_workflows_decorator.py,sha256=CfKVoHCOsCCQMghhPE30xw15gacwp3hR23HCo9ZZFVg,6580
151
151
  metaflow/plugins/argo/process_input_paths.py,sha256=LjUSP8PVU-DRGEPxjas99nzyAO-fI82Bxxbr_QETE88,565
@@ -243,11 +243,11 @@ metaflow/plugins/gcp/gs_tail.py,sha256=Jl_wvnzU7dub07A-DOAuP5FeccNIrPM-CeL1xKFs1
243
243
  metaflow/plugins/gcp/gs_utils.py,sha256=ZmIGFse1qYyvAVrwga23PQUzF6dXEDLLsZ2F-YRmvow,2030
244
244
  metaflow/plugins/gcp/includefile_support.py,sha256=vIDeR-MiJuUh-2S2pV7Z7FBkhIWwtHXaRrj76MWGRiY,3869
245
245
  metaflow/plugins/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
246
- metaflow/plugins/kubernetes/kubernetes.py,sha256=khwVy1r0bPRjcj5t7S9RwhHm_ACmO5qU0-T_gMxGopw,17371
247
- metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=Y8XNVTumxGkYLs_pFE04w8X0zCEvCSNaIBQx21zETC8,8942
246
+ metaflow/plugins/kubernetes/kubernetes.py,sha256=ePh4vzHDJFkooJKT75zJgipjQlwslyRs1VFcZVYkabE,18834
247
+ metaflow/plugins/kubernetes/kubernetes_cli.py,sha256=RugVe3UHWFGd03OM76fSzxSt3QYAT8KHQ5-iiKzQrGA,10092
248
248
  metaflow/plugins/kubernetes/kubernetes_client.py,sha256=dV3TEGQMBbljmv6Gs1EKfmHTorKt21lhSiYsNx0To08,1901
249
- metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=HTDM5Z-04nQK75vFXFLdW1a4dWSEHEJerKqKc65khMA,21184
250
- metaflow/plugins/kubernetes/kubernetes_job.py,sha256=XF8_dXhoEyCys-aIZliCdnnwmkXfXQudFJNAp7NM9Oc,30723
249
+ metaflow/plugins/kubernetes/kubernetes_decorator.py,sha256=Rs2KGy0yInQmMq9W2jEockiq2eOrrnd1TAMmpu1Q9pA,24103
250
+ metaflow/plugins/kubernetes/kubernetes_job.py,sha256=8LNMwZSz1afbQXrPNJnDo_nTMIA0SQza6yjxkf2N2_k,50853
251
251
  metaflow/plugins/metadata/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
252
252
  metaflow/plugins/metadata/local.py,sha256=YhLJC5zjVJrvQFIyQ92ZBByiUmhCC762RUX7ITX12O8,22428
253
253
  metaflow/plugins/metadata/service.py,sha256=ihq5F7KQZlxvYwzH_-jyP2aWN_I96i2vp92j_d697s8,20204
@@ -295,9 +295,9 @@ metaflow/tutorials/07-worldview/README.md,sha256=5vQTrFqulJ7rWN6r20dhot9lI2sVj9W
295
295
  metaflow/tutorials/07-worldview/worldview.ipynb,sha256=ztPZPI9BXxvW1QdS2Tfe7LBuVzvFvv0AToDnsDJhLdE,2237
296
296
  metaflow/tutorials/08-autopilot/README.md,sha256=GnePFp_q76jPs991lMUqfIIh5zSorIeWznyiUxzeUVE,1039
297
297
  metaflow/tutorials/08-autopilot/autopilot.ipynb,sha256=DQoJlILV7Mq9vfPBGW-QV_kNhWPjS5n6SJLqePjFYLY,3191
298
- ob_metaflow-2.10.11.1.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
299
- ob_metaflow-2.10.11.1.dist-info/METADATA,sha256=aLS4SeD3av7VHhDb1fY4m36TlGjoDqvEY2Bb3TmLqIo,5062
300
- ob_metaflow-2.10.11.1.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
301
- ob_metaflow-2.10.11.1.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
302
- ob_metaflow-2.10.11.1.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
303
- ob_metaflow-2.10.11.1.dist-info/RECORD,,
298
+ ob_metaflow-2.11.0.2.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
299
+ ob_metaflow-2.11.0.2.dist-info/METADATA,sha256=K4SO4xxndoBOLUCcxWPSyonbeAL6FnLqqTLCOq0CSZU,5061
300
+ ob_metaflow-2.11.0.2.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
301
+ ob_metaflow-2.11.0.2.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
302
+ ob_metaflow-2.11.0.2.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
303
+ ob_metaflow-2.11.0.2.dist-info/RECORD,,