ob-metaflow-extensions 1.1.96__tar.gz → 1.1.98__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/__init__.py +1 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +13 -10
- ob-metaflow-extensions-1.1.98/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +50 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/ob_metaflow_extensions.egg-info/SOURCES.txt +1 -0
- ob-metaflow-extensions-1.1.98/ob_metaflow_extensions.egg-info/requires.txt +3 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/setup.py +2 -2
- ob-metaflow-extensions-1.1.96/ob_metaflow_extensions.egg-info/requires.txt +0 -3
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/README.md +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.96 → ob-metaflow-extensions-1.1.98}/setup.cfg +0 -0
|
@@ -317,6 +317,7 @@ STEP_DECORATORS_DESC = [
|
|
|
317
317
|
".fast_bakery.fast_bakery_decorator.InternalFastBakeryDecorator",
|
|
318
318
|
),
|
|
319
319
|
("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
|
|
320
|
+
("tensorboard", ".tensorboard.TensorboardDecorator"),
|
|
320
321
|
]
|
|
321
322
|
FLOW_DECORATORS_DESC = [("nim", ".nim.NimDecorator")]
|
|
322
323
|
TOGGLE_STEP_DECORATOR = [
|
|
@@ -59,11 +59,11 @@ class Nvcf(object):
|
|
|
59
59
|
code_package_url, code_package_ds
|
|
60
60
|
)
|
|
61
61
|
init_expr = " && ".join(init_cmds)
|
|
62
|
-
heartbeat_expr = f'python -m metaflow_extensions.outerbounds.plugins.nvcf.heartbeat_store "$MAIN_PID" {code_package_ds} nvcf_heartbeats
|
|
62
|
+
heartbeat_expr = f'python -m metaflow_extensions.outerbounds.plugins.nvcf.heartbeat_store "$MAIN_PID" {code_package_ds} nvcf_heartbeats & HEARTBEAT_PID=$!;'
|
|
63
63
|
step_expr = bash_capture_logs(
|
|
64
64
|
" && ".join(
|
|
65
65
|
self.environment.bootstrap_commands(step_name, code_package_ds)
|
|
66
|
-
+ [step_cli + " & MAIN_PID=$!; " + heartbeat_expr]
|
|
66
|
+
+ [step_cli + " & MAIN_PID=$!; " + heartbeat_expr + " wait $MAIN_PID"]
|
|
67
67
|
)
|
|
68
68
|
)
|
|
69
69
|
|
|
@@ -84,7 +84,10 @@ class Nvcf(object):
|
|
|
84
84
|
#
|
|
85
85
|
# Note that if step_expr OOMs, this tail expression is never executed.
|
|
86
86
|
# We lose the last logs in this scenario.
|
|
87
|
-
cmd_str +=
|
|
87
|
+
cmd_str += (
|
|
88
|
+
"c=$?; kill $HEARTBEAT_PID; wait $HEARTBEAT_PID; %s; exit $c"
|
|
89
|
+
% BASH_SAVE_LOGS
|
|
90
|
+
)
|
|
88
91
|
cmd_str = (
|
|
89
92
|
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"} && %s'
|
|
90
93
|
% cmd_str
|
|
@@ -222,10 +225,10 @@ class Job(object):
|
|
|
222
225
|
self._invocation_id = response.headers.get("NVCF-REQID")
|
|
223
226
|
if response.getcode() == 200:
|
|
224
227
|
data = json.loads(response.read())
|
|
225
|
-
if data
|
|
226
|
-
self._status = JobStatus.FAILED
|
|
227
|
-
else:
|
|
228
|
+
if data.get("exit_code") == 0:
|
|
228
229
|
self._status = JobStatus.SUCCESSFUL
|
|
230
|
+
else:
|
|
231
|
+
self._status = JobStatus.FAILED
|
|
229
232
|
self._result = data
|
|
230
233
|
elif response.getcode() == 202:
|
|
231
234
|
self._status = JobStatus.SUBMITTED
|
|
@@ -270,11 +273,11 @@ class Job(object):
|
|
|
270
273
|
response = urlopen(request)
|
|
271
274
|
if response.getcode() == 200:
|
|
272
275
|
data = json.loads(response.read())
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
self._status = JobStatus.FAILED
|
|
276
|
-
else:
|
|
276
|
+
# TODO: Propagate the internal error forward
|
|
277
|
+
if data.get("exit_code") == 0:
|
|
277
278
|
self._status = JobStatus.SUCCESSFUL
|
|
279
|
+
else:
|
|
280
|
+
self._status = JobStatus.FAILED
|
|
278
281
|
self._result = data
|
|
279
282
|
elif response.getcode() in [400, 500]:
|
|
280
283
|
self._status = JobStatus.FAILED
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from metaflow.decorators import StepDecorator
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TensorboardDecorator(StepDecorator):
|
|
6
|
+
name = "tensorboard"
|
|
7
|
+
defaults = {}
|
|
8
|
+
|
|
9
|
+
def task_decorate(
|
|
10
|
+
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
11
|
+
):
|
|
12
|
+
@functools.wraps(step_func)
|
|
13
|
+
def tb_wrapper():
|
|
14
|
+
import sys, os
|
|
15
|
+
from metaflow import metaflow_config, current
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
19
|
+
except:
|
|
20
|
+
print(
|
|
21
|
+
"[@tensorboard] Torch and tensorboard not found - logging disabled!",
|
|
22
|
+
file=sys.stderr,
|
|
23
|
+
)
|
|
24
|
+
step_func()
|
|
25
|
+
else:
|
|
26
|
+
tb_root = os.path.join(metaflow_config.DATATOOLS_S3ROOT, "tb")
|
|
27
|
+
pathspec = current.pathspec
|
|
28
|
+
try:
|
|
29
|
+
log_dir = os.path.join(tb_root, current.project_flow_name, pathspec)
|
|
30
|
+
except:
|
|
31
|
+
log_dir = os.path.join(tb_root, pathspec)
|
|
32
|
+
comps = log_dir[len(tb_root) + 1 :].split("/")
|
|
33
|
+
run_level = "/".join(comps[:-2])
|
|
34
|
+
flow_level = "/".join(comps[:-3])
|
|
35
|
+
|
|
36
|
+
print("[@tensorboard] -- INSPECTING RESULTS")
|
|
37
|
+
print(
|
|
38
|
+
"[@tensorboard] -- Execute one of these commands on your workstation:"
|
|
39
|
+
)
|
|
40
|
+
print(f"[@tensorboard] Compare tasks of this run: obtb {run_level}")
|
|
41
|
+
print(f"[@tensorboard] Compare across runs: obtb {flow_level}")
|
|
42
|
+
writer = SummaryWriter(log_dir=log_dir)
|
|
43
|
+
setattr(flow, "obtb", writer)
|
|
44
|
+
try:
|
|
45
|
+
step_func()
|
|
46
|
+
finally:
|
|
47
|
+
writer.flush()
|
|
48
|
+
delattr(flow, "obtb")
|
|
49
|
+
|
|
50
|
+
return tb_wrapper
|
|
@@ -28,6 +28,7 @@ metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py
|
|
|
28
28
|
metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py
|
|
29
29
|
metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py
|
|
30
30
|
metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py
|
|
31
|
+
metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py
|
|
31
32
|
metaflow_extensions/outerbounds/profilers/__init__.py
|
|
32
33
|
metaflow_extensions/outerbounds/profilers/gpu.py
|
|
33
34
|
metaflow_extensions/outerbounds/toplevel/__init__.py
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
version = "1.1.
|
|
5
|
+
version = "1.1.98"
|
|
6
6
|
this_directory = Path(__file__).parent
|
|
7
7
|
long_description = (this_directory / "README.md").read_text()
|
|
8
8
|
|
|
@@ -18,6 +18,6 @@ setup(
|
|
|
18
18
|
install_requires=[
|
|
19
19
|
"boto3",
|
|
20
20
|
"kubernetes",
|
|
21
|
-
"ob-metaflow == 2.12.
|
|
21
|
+
"ob-metaflow == 2.12.25.1",
|
|
22
22
|
],
|
|
23
23
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|