ob-metaflow-extensions 1.1.88__tar.gz → 1.1.89__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (45) hide show
  1. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/PKG-INFO +1 -1
  2. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/__init__.py +1 -1
  3. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +76 -48
  4. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +2 -0
  5. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +19 -3
  6. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +22 -12
  7. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +1 -1
  8. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  9. ob-metaflow-extensions-1.1.89/ob_metaflow_extensions.egg-info/requires.txt +3 -0
  10. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/setup.py +2 -2
  11. ob-metaflow-extensions-1.1.88/ob_metaflow_extensions.egg-info/requires.txt +0 -3
  12. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/README.md +0 -0
  13. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/__init__.py +0 -0
  14. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  15. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  16. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  17. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
  18. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
  19. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  20. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  21. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
  22. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
  23. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  24. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
  25. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
  26. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  27. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
  28. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
  29. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
  30. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
  31. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
  32. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
  33. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
  34. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  35. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  36. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/remote_config.py +0 -0
  37. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  38. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
  39. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  40. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  41. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  42. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
  43. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  44. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  45. {ob-metaflow-extensions-1.1.88 → ob-metaflow-extensions-1.1.89}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.88
3
+ Version: 1.1.89
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -306,7 +306,7 @@ class ObpGcpAuthProvider(object):
306
306
 
307
307
  GCP_CLIENT_PROVIDERS_DESC = [("obp", ".ObpGcpAuthProvider")]
308
308
  CLIS_DESC = [
309
- ("nvcf", ".nvcf.nvcf_cli.cli"),
309
+ ("nvidia", ".nvcf.nvcf_cli.cli"),
310
310
  ("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
311
311
  ("snowpark", ".snowpark.snowpark_cli.cli"),
312
312
  ]
@@ -15,6 +15,7 @@ from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
15
15
  from metaflow.plugins.pypi.conda_decorator import CondaStepDecorator
16
16
  from metaflow.plugins.pypi.conda_environment import CondaEnvironment
17
17
  from metaflow.plugins.pypi.pypi_decorator import PyPIStepDecorator
18
+ from metaflow import decorators
18
19
 
19
20
  from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
20
21
 
@@ -26,8 +27,6 @@ import os
26
27
  from concurrent.futures import ThreadPoolExecutor
27
28
  from functools import wraps
28
29
 
29
- # TODO - ensure that both @conda/@pypi are not assigned to the same step
30
-
31
30
 
32
31
  def cache_request(cache_file):
33
32
  def decorator(func):
@@ -103,7 +102,10 @@ class DockerEnvironment(MetaflowEnvironment):
103
102
  self.local_root = local_root
104
103
 
105
104
  def decospecs(self):
106
- return ("conda", "fast_bakery_internal") + super().decospecs()
105
+ # Due to conflicts with the CondaEnvironment fallback and bakery,
106
+ # we can not simply attach 'conda' or 'pypi' to all steps here.
107
+ # Instead we do this on a per-step basis in init_environment
108
+ return ("fast_bakery_internal",) + super().decospecs()
107
109
 
108
110
  def validate_environment(self, logger, datastore_type):
109
111
  self.datastore_type = datastore_type
@@ -116,14 +118,7 @@ class DockerEnvironment(MetaflowEnvironment):
116
118
 
117
119
  # Mixing @pypi/@conda in a single step is not supported yet
118
120
  for step in self.flow:
119
- if (
120
- sum(
121
- 1
122
- for deco in step.decorators
123
- if isinstance(deco, (PyPIStepDecorator, CondaStepDecorator))
124
- )
125
- > 1
126
- ):
121
+ if sum(1 for deco in step.decorators if _is_env_deco(deco)) > 1:
127
122
  raise MetaflowException(
128
123
  "Mixing and matching PyPI packages and Conda packages within a\n"
129
124
  "step is not yet supported. Use one of @pypi or @conda only for the *%s* step."
@@ -132,13 +127,31 @@ class DockerEnvironment(MetaflowEnvironment):
132
127
 
133
128
  def init_environment(self, echo):
134
129
  self.skipped_steps = {
135
- step.name
136
- for step in self.flow
137
- if not any(
138
- isinstance(deco, (BatchDecorator, KubernetesDecorator))
139
- for deco in step.decorators
140
- )
130
+ step.name for step in self.flow if not _step_executes_remotely(step)
141
131
  }
132
+ # Attach environment decorator as needed. This is done on a step-by-step basis
133
+ # as we require a conda decorator for fallback steps, but prefer pypi for the baked ones.
134
+ for step in self.flow:
135
+ if not _step_has_environment_deco(step):
136
+ if step.name in self.skipped_steps:
137
+ # Conda fallback requires a conda decorator as the default for a step
138
+ decorators._attach_decorators_to_step(step, ["conda"])
139
+ else:
140
+ # We default to PyPI for steps that are going to be baked.
141
+ decorators._attach_decorators_to_step(step, ["pypi"])
142
+ # Initialize the decorator we attached.
143
+ # This is crucial for the conda decorator to work properly in the fallback environment
144
+ for deco in step.decorators:
145
+ if _is_env_deco(deco):
146
+ deco.step_init(
147
+ self.flow,
148
+ None, # not passing graph as it is not available, and not required by conda/pypi decorators
149
+ step.name,
150
+ step.decorators,
151
+ self,
152
+ self.datastore,
153
+ echo,
154
+ )
142
155
 
143
156
  steps_to_bake = [
144
157
  step for step in self.flow if step.name not in self.skipped_steps
@@ -149,7 +162,7 @@ class DockerEnvironment(MetaflowEnvironment):
149
162
  self.results = self._bake(steps_to_bake)
150
163
  for step in self.flow:
151
164
  for d in step.decorators:
152
- if isinstance(d, (BatchDecorator, KubernetesDecorator)):
165
+ if _is_remote_deco(d):
153
166
  d.attributes["image"] = self.results[step.name].container_image
154
167
  d.attributes["executable"] = self.results[step.name].python_path
155
168
  if self.images_baked > 0:
@@ -178,32 +191,32 @@ class DockerEnvironment(MetaflowEnvironment):
178
191
  conda_packages=None,
179
192
  base_image=None,
180
193
  ):
181
- bakery = FastBakery(url=FAST_BAKERY_URL)
182
- bakery._reset_payload()
183
- bakery.python_version(python)
184
- bakery.pypi_packages(pypi_packages)
185
- bakery.conda_packages(conda_packages)
186
- bakery.base_image(base_image)
187
- # bakery.ignore_cache()
188
-
189
- with logger_lock:
190
- self.logger(f"🍳 Baking [{ref}] ...")
191
- self.logger(f" 🐍 Python: {python}")
192
-
193
- if pypi_packages:
194
- self.logger(f" 📦 PyPI packages:")
195
- for package, version in pypi_packages.items():
196
- self.logger(f" 🔧 {package}: {version}")
197
-
198
- if conda_packages:
199
- self.logger(f" 📦 Conda packages:")
200
- for package, version in conda_packages.items():
201
- self.logger(f" 🔧 {package}: {version}")
202
-
203
- self.logger(f" 🏗️ Base image: {base_image}")
204
-
205
- start_time = time.time()
206
194
  try:
195
+ bakery = FastBakery(url=FAST_BAKERY_URL)
196
+ bakery._reset_payload()
197
+ bakery.python_version(python)
198
+ bakery.pypi_packages(pypi_packages)
199
+ bakery.conda_packages(conda_packages)
200
+ bakery.base_image(base_image)
201
+ # bakery.ignore_cache()
202
+
203
+ with logger_lock:
204
+ self.logger(f"🍳 Baking [{ref}] ...")
205
+ self.logger(f" 🐍 Python: {python}")
206
+
207
+ if pypi_packages:
208
+ self.logger(f" 📦 PyPI packages:")
209
+ for package, version in pypi_packages.items():
210
+ self.logger(f" 🔧 {package}: {version}")
211
+
212
+ if conda_packages:
213
+ self.logger(f" 📦 Conda packages:")
214
+ for package, version in conda_packages.items():
215
+ self.logger(f" 🔧 {package}: {version}")
216
+
217
+ self.logger(f" 🏗️ Base image: {base_image}")
218
+
219
+ start_time = time.time()
207
220
  res = bakery.bake()
208
221
  # TODO: Get actual bake time from bakery
209
222
  bake_time = time.time() - start_time
@@ -225,11 +238,7 @@ class DockerEnvironment(MetaflowEnvironment):
225
238
  None,
226
239
  )
227
240
  dependencies = next(
228
- (
229
- d
230
- for d in step.decorators
231
- if isinstance(d, (CondaStepDecorator, PyPIStepDecorator))
232
- ),
241
+ (d for d in step.decorators if _is_env_deco(d)),
233
242
  None,
234
243
  )
235
244
  python = next(
@@ -322,3 +331,22 @@ class DockerEnvironment(MetaflowEnvironment):
322
331
 
323
332
  def get_fastbakery_metafile_path(local_root, flow_name):
324
333
  return os.path.join(local_root, flow_name, BAKERY_METAFILE)
334
+
335
+
336
+ def _is_remote_deco(deco):
337
+ return isinstance(deco, (BatchDecorator, KubernetesDecorator))
338
+
339
+
340
+ def _step_executes_remotely(step):
341
+ "Check if a step is going to execute remotely or locally"
342
+ return any(_is_remote_deco(deco) for deco in step.decorators)
343
+
344
+
345
+ def _is_env_deco(deco):
346
+ "Check if a decorator is a known environment decorator (conda/pypi)"
347
+ return isinstance(deco, (PyPIStepDecorator, CondaStepDecorator))
348
+
349
+
350
+ def _step_has_environment_deco(step):
351
+ "Check if a step has a virtual environment decorator"
352
+ return any(_is_env_deco(deco) for deco in step.decorators)
@@ -73,6 +73,8 @@ class FastBakeryApiResponse:
73
73
 
74
74
  class FastBakery:
75
75
  def __init__(self, url: str):
76
+ if not url:
77
+ raise FastBakeryException("Specifying a url is required.")
76
78
  self.url = url
77
79
  self.headers = {"Content-Type": "application/json", "Connection": "keep-alive"}
78
80
  self._reset_payload()
@@ -1,13 +1,29 @@
1
1
  import os
2
2
  import sys
3
3
  import time
4
- import signal
4
+ import subprocess
5
5
  from io import BytesIO
6
6
  from datetime import datetime, timezone
7
7
 
8
8
  from metaflow.exception import MetaflowException
9
9
 
10
10
 
11
+ def kill_process_and_descendants(pid, termination_timeout=5):
12
+ try:
13
+ subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
14
+ subprocess.check_call(["kill", "-TERM", str(pid)])
15
+ except subprocess.CalledProcessError:
16
+ pass
17
+
18
+ time.sleep(termination_timeout)
19
+
20
+ try:
21
+ subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
22
+ subprocess.check_call(["kill", "-KILL", str(pid)])
23
+ except subprocess.CalledProcessError:
24
+ pass
25
+
26
+
11
27
  class HeartbeatStore(object):
12
28
  def __init__(
13
29
  self,
@@ -67,7 +83,7 @@ class HeartbeatStore(object):
67
83
  contents = f.read()
68
84
  if "tombstone" in contents:
69
85
  print("[Outerbounds] Tombstone detected. Terminating the task..")
70
- os.kill(self.main_pid, signal.SIGTERM)
86
+ kill_process_and_descendants(self.main_pid)
71
87
  sys.exit(1)
72
88
 
73
89
  def __handle_heartbeat(self, path):
@@ -86,7 +102,7 @@ class HeartbeatStore(object):
86
102
  print(
87
103
  f"[Outerbounds] Missed {self.max_missed_heartbeats} consecutive heartbeats. Terminating the task.."
88
104
  )
89
- os.kill(self.main_pid, signal.SIGTERM)
105
+ kill_process_and_descendants(self.main_pid)
90
106
  sys.exit(1)
91
107
 
92
108
  def is_main_process_running(self):
@@ -39,13 +39,18 @@ def cli():
39
39
  pass
40
40
 
41
41
 
42
- @cli.group(help="Commands related to NVCF.")
43
- def nvcf():
42
+ @cli.group(help="Commands related to nvidia.")
43
+ def nvidia():
44
44
  pass
45
45
 
46
46
 
47
- @nvcf.command(help="List steps / tasks running as an NVCF job.")
48
- @click.argument("run-id")
47
+ @nvidia.command(help="List steps / tasks running as an nvidia job.")
48
+ @click.option(
49
+ "--run-id",
50
+ default=None,
51
+ required=True,
52
+ help="List unfinished tasks corresponding to the run id.",
53
+ )
49
54
  @click.pass_context
50
55
  def list(ctx, run_id):
51
56
  flow_name = ctx.obj.flow.name
@@ -65,13 +70,18 @@ def list(ctx, run_id):
65
70
 
66
71
  if running_invocations:
67
72
  for each_invocation in running_invocations:
68
- print(each_invocation)
73
+ ctx.obj.echo(each_invocation)
69
74
  else:
70
- print("No running NVCF invocations for Run ID: %s" % run_id)
75
+ ctx.obj.echo("No running @nvidia invocations for Run ID: %s" % run_id)
71
76
 
72
77
 
73
- @nvcf.command(help="Kill steps / tasks running as an NVCF job.")
74
- @click.argument("run-id")
78
+ @nvidia.command(help="Kill steps / tasks running as an nvidia job.")
79
+ @click.option(
80
+ "--run-id",
81
+ default=None,
82
+ required=True,
83
+ help="Terminate unfinished tasks corresponding to the run id.",
84
+ )
75
85
  @click.pass_context
76
86
  def kill(ctx, run_id):
77
87
  from metaflow_extensions.outerbounds.plugins.nvcf.heartbeat_store import (
@@ -100,12 +110,12 @@ def kill(ctx, run_id):
100
110
  )
101
111
  store.emit_tombstone(folder_name="nvcf_heartbeats")
102
112
  else:
103
- print("No running NVCF invocations for Run ID: %s" % run_id)
113
+ ctx.obj.echo("No running @nvidia invocations for Run ID: %s" % run_id)
104
114
 
105
115
 
106
- @nvcf.command(
107
- help="Execute a single task using NVCF. This command calls the "
108
- "top-level step command inside a NVCF job with the given options. "
116
+ @nvidia.command(
117
+ help="Execute a single task using @nvidia. This command calls the "
118
+ "top-level step command inside an nvidia job with the given options. "
109
119
  "Typically you do not call this command directly; it is used internally by "
110
120
  "Metaflow."
111
121
  )
@@ -72,7 +72,7 @@ class NvcfDecorator(StepDecorator):
72
72
  # after all attempts to run the user code have failed, we don't need
73
73
  # to execute on NVCF anymore. We can execute possible fallback
74
74
  # code locally.
75
- cli_args.commands = ["nvcf", "step"]
75
+ cli_args.commands = ["nvidia", "step"]
76
76
  cli_args.command_args.append(self.package_sha)
77
77
  cli_args.command_args.append(self.package_url)
78
78
  cli_args.command_options.update(self.attributes)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.88
3
+ Version: 1.1.89
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -0,0 +1,3 @@
1
+ boto3
2
+ kubernetes
3
+ ob-metaflow==2.12.19.1
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
  from pathlib import Path
3
3
 
4
4
 
5
- version = "1.1.88"
5
+ version = "1.1.89"
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
8
 
@@ -18,6 +18,6 @@ setup(
18
18
  install_requires=[
19
19
  "boto3",
20
20
  "kubernetes",
21
- "ob-metaflow == 2.12.18.2",
21
+ "ob-metaflow == 2.12.19.1",
22
22
  ],
23
23
  )
@@ -1,3 +0,0 @@
1
- boto3
2
- kubernetes
3
- ob-metaflow==2.12.18.2