ob-metaflow-extensions 1.1.127__py2.py3-none-any.whl → 1.1.129__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/plugins/__init__.py +2 -1
- metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +142 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +545 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +70 -0
- metaflow_extensions/outerbounds/plugins/nim/__init__.py +92 -7
- metaflow_extensions/outerbounds/plugins/nim/card.py +154 -0
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +188 -102
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +5 -0
- {ob_metaflow_extensions-1.1.127.dist-info → ob_metaflow_extensions-1.1.129.dist-info}/METADATA +2 -2
- {ob_metaflow_extensions-1.1.127.dist-info → ob_metaflow_extensions-1.1.129.dist-info}/RECORD +13 -7
- {ob_metaflow_extensions-1.1.127.dist-info → ob_metaflow_extensions-1.1.129.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.127.dist-info → ob_metaflow_extensions-1.1.129.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,17 @@
|
|
|
1
1
|
from functools import partial
|
|
2
|
-
from
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
import os, time
|
|
4
|
+
from metaflow.decorators import StepDecorator
|
|
3
5
|
from metaflow import current
|
|
6
|
+
|
|
4
7
|
from .nim_manager import NimManager
|
|
8
|
+
from .card import NimMetricsRefresher
|
|
9
|
+
from .utilities import get_storage_path, NIM_MONITOR_LOCAL_STORAGE_ROOT
|
|
10
|
+
from ..card_utilities.async_cards import AsyncPeriodicRefresher
|
|
11
|
+
from ..card_utilities.injector import CardDecoratorInjector
|
|
5
12
|
|
|
6
13
|
|
|
7
|
-
class NimDecorator(
|
|
14
|
+
class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
8
15
|
"""
|
|
9
16
|
This decorator is used to run NIM containers in Metaflow tasks as sidecars.
|
|
10
17
|
|
|
@@ -18,13 +25,12 @@ class NimDecorator(FlowDecorator):
|
|
|
18
25
|
Valid backend options
|
|
19
26
|
---------------------
|
|
20
27
|
- 'managed': Outerbounds selects a compute provider based on the model.
|
|
21
|
-
- 🚧 'dataplane': Run in your account.
|
|
22
28
|
|
|
23
29
|
Valid model options
|
|
24
30
|
----------------
|
|
25
31
|
- 'meta/llama3-8b-instruct': 8B parameter model
|
|
26
32
|
- 'meta/llama3-70b-instruct': 70B parameter model
|
|
27
|
-
-
|
|
33
|
+
- any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
|
|
28
34
|
|
|
29
35
|
Parameters
|
|
30
36
|
----------
|
|
@@ -32,21 +38,100 @@ class NimDecorator(FlowDecorator):
|
|
|
32
38
|
List of NIM containers running models in sidecars.
|
|
33
39
|
backend: str
|
|
34
40
|
Compute provider to run the NIM container.
|
|
41
|
+
queue_timeout : int
|
|
42
|
+
Time to keep the job in NVCF's queue.
|
|
35
43
|
"""
|
|
36
44
|
|
|
37
45
|
name = "nim"
|
|
38
46
|
defaults = {
|
|
39
47
|
"models": [],
|
|
40
48
|
"backend": "managed",
|
|
49
|
+
"monitor": True,
|
|
50
|
+
"persist_db": False,
|
|
51
|
+
"queue_timeout": 5 * 24 * 3600, # Default 5 days in seconds
|
|
41
52
|
}
|
|
42
53
|
|
|
43
|
-
def
|
|
44
|
-
self, flow, graph,
|
|
54
|
+
def step_init(
|
|
55
|
+
self, flow, graph, step_name, decorators, environment, flow_datastore, logger
|
|
45
56
|
):
|
|
57
|
+
|
|
58
|
+
if self.attributes["monitor"]:
|
|
59
|
+
self.attach_card_decorator(
|
|
60
|
+
flow,
|
|
61
|
+
step_name,
|
|
62
|
+
NimMetricsRefresher.CARD_ID,
|
|
63
|
+
"blank",
|
|
64
|
+
refresh_interval=4.0,
|
|
65
|
+
)
|
|
66
|
+
|
|
46
67
|
current._update_env(
|
|
47
68
|
{
|
|
48
69
|
"nim": NimManager(
|
|
49
|
-
models=self.attributes["models"],
|
|
70
|
+
models=self.attributes["models"],
|
|
71
|
+
backend=self.attributes["backend"],
|
|
72
|
+
flow=flow,
|
|
73
|
+
step_name=step_name,
|
|
74
|
+
monitor=self.attributes["monitor"],
|
|
75
|
+
queue_timeout=self.attributes["queue_timeout"],
|
|
50
76
|
)
|
|
51
77
|
}
|
|
52
78
|
)
|
|
79
|
+
|
|
80
|
+
def task_decorate(
|
|
81
|
+
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
82
|
+
):
|
|
83
|
+
if self.attributes["monitor"]:
|
|
84
|
+
|
|
85
|
+
import sqlite3
|
|
86
|
+
from metaflow import current
|
|
87
|
+
|
|
88
|
+
file_path = get_storage_path(current.task_id)
|
|
89
|
+
if os.path.exists(file_path):
|
|
90
|
+
os.remove(file_path)
|
|
91
|
+
os.makedirs(NIM_MONITOR_LOCAL_STORAGE_ROOT, exist_ok=True)
|
|
92
|
+
conn = sqlite3.connect(file_path)
|
|
93
|
+
cursor = conn.cursor()
|
|
94
|
+
cursor.execute(
|
|
95
|
+
"""
|
|
96
|
+
CREATE TABLE metrics (
|
|
97
|
+
error INTEGER,
|
|
98
|
+
success INTEGER,
|
|
99
|
+
status_code INTEGER,
|
|
100
|
+
prompt_tokens INTEGER,
|
|
101
|
+
completion_tokens INTEGER,
|
|
102
|
+
e2e_time NUMERIC,
|
|
103
|
+
model TEXT
|
|
104
|
+
)
|
|
105
|
+
"""
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _wrapped_step_func(*args, **kwargs):
|
|
109
|
+
async_refresher_metrics = AsyncPeriodicRefresher(
|
|
110
|
+
NimMetricsRefresher(),
|
|
111
|
+
updater_interval=4.0,
|
|
112
|
+
collector_interval=2.0,
|
|
113
|
+
file_name=file_path,
|
|
114
|
+
)
|
|
115
|
+
try:
|
|
116
|
+
async_refresher_metrics.start()
|
|
117
|
+
return step_func(*args, **kwargs)
|
|
118
|
+
finally:
|
|
119
|
+
time.sleep(5.0) # buffer for the last update to synchronize
|
|
120
|
+
async_refresher_metrics.stop()
|
|
121
|
+
|
|
122
|
+
return _wrapped_step_func
|
|
123
|
+
else:
|
|
124
|
+
return step_func
|
|
125
|
+
|
|
126
|
+
def task_post_step(
|
|
127
|
+
self, step_name, flow, graph, retry_count, max_user_code_retries
|
|
128
|
+
):
|
|
129
|
+
if not self.attributes["persist_db"]:
|
|
130
|
+
import shutil
|
|
131
|
+
|
|
132
|
+
file_path = get_storage_path(current.task_id)
|
|
133
|
+
if os.path.exists(file_path):
|
|
134
|
+
os.remove(file_path)
|
|
135
|
+
# if this task is the last one, delete the whole enchilada.
|
|
136
|
+
if not os.listdir(NIM_MONITOR_LOCAL_STORAGE_ROOT):
|
|
137
|
+
shutil.rmtree(NIM_MONITOR_LOCAL_STORAGE_ROOT, ignore_errors=True)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import os, sqlite3
|
|
2
|
+
from metaflow.cards import (
|
|
3
|
+
Markdown,
|
|
4
|
+
Table,
|
|
5
|
+
ProgressBar,
|
|
6
|
+
)
|
|
7
|
+
from metaflow.decorators import StepDecorator
|
|
8
|
+
from metaflow.metaflow_current import current
|
|
9
|
+
|
|
10
|
+
from .utilities import get_storage_path
|
|
11
|
+
from ..card_utilities.async_cards import CardRefresher
|
|
12
|
+
from ..card_utilities.extra_components import BarPlot, ViolinPlot
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def json_to_artifact_table(data):
|
|
16
|
+
return ArtifactTable(data)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class NimMetricsRefresher(CardRefresher):
|
|
20
|
+
CARD_ID = "nim_metrics"
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._metrics_charts = {}
|
|
24
|
+
self._last_updated_on = None
|
|
25
|
+
self._already_rendered = False
|
|
26
|
+
self._file_name = get_storage_path(current.task_id)
|
|
27
|
+
|
|
28
|
+
def sqlite_fetch_func(self, conn):
|
|
29
|
+
cursor = conn.cursor()
|
|
30
|
+
try:
|
|
31
|
+
conn = sqlite3.connect(self._file_name)
|
|
32
|
+
cursor = conn.cursor()
|
|
33
|
+
cursor.execute(
|
|
34
|
+
"SELECT error, success, status_code, prompt_tokens, completion_tokens, e2e_time, model FROM metrics"
|
|
35
|
+
)
|
|
36
|
+
rows = cursor.fetchall()
|
|
37
|
+
data = {
|
|
38
|
+
"error": 0,
|
|
39
|
+
"success": 0,
|
|
40
|
+
"status_code": [],
|
|
41
|
+
"prompt_tokens": [],
|
|
42
|
+
"completion_tokens": [],
|
|
43
|
+
"e2e_time": [],
|
|
44
|
+
"model": [],
|
|
45
|
+
}
|
|
46
|
+
for row in rows:
|
|
47
|
+
data["error"] += row[0]
|
|
48
|
+
data["success"] += row[1]
|
|
49
|
+
data["status_code"].append(row[2])
|
|
50
|
+
data["prompt_tokens"].append(row[3])
|
|
51
|
+
data["completion_tokens"].append(row[4])
|
|
52
|
+
data["e2e_time"].append(row[5])
|
|
53
|
+
data["model"].append(row[6])
|
|
54
|
+
return data
|
|
55
|
+
finally:
|
|
56
|
+
conn.close()
|
|
57
|
+
|
|
58
|
+
def render_card_fresh(self, current_card, data):
|
|
59
|
+
self._already_rendered = True
|
|
60
|
+
current_card.clear()
|
|
61
|
+
current_card.append(Markdown("## Metrics"))
|
|
62
|
+
|
|
63
|
+
self._metrics_charts["request_success"] = BarPlot(
|
|
64
|
+
title="Request success",
|
|
65
|
+
category_name="category",
|
|
66
|
+
value_name="amount",
|
|
67
|
+
orientation="horizontal",
|
|
68
|
+
)
|
|
69
|
+
self._metrics_charts["latency_distribution"] = ViolinPlot(
|
|
70
|
+
title="Latency distribution (s)",
|
|
71
|
+
category_col_name="model",
|
|
72
|
+
value_col_name="e2e_time",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
current_card.append(
|
|
76
|
+
Table(
|
|
77
|
+
data=[
|
|
78
|
+
[
|
|
79
|
+
self._metrics_charts["request_success"],
|
|
80
|
+
],
|
|
81
|
+
[self._metrics_charts["latency_distribution"]],
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
current_card.refresh()
|
|
86
|
+
|
|
87
|
+
def on_startup(self, current_card):
|
|
88
|
+
current_card.append(Markdown("# Task-level NIM API metrics"))
|
|
89
|
+
current_card.append(
|
|
90
|
+
Markdown(
|
|
91
|
+
"_waiting for data to appear_",
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
current_card.refresh()
|
|
95
|
+
|
|
96
|
+
def on_error(self, current_card, error_message):
|
|
97
|
+
|
|
98
|
+
if isinstance(error_message, FileNotFoundError):
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
if not self._already_rendered:
|
|
102
|
+
current_card.clear()
|
|
103
|
+
current_card.append(
|
|
104
|
+
Markdown(
|
|
105
|
+
f"## Error: {str(error_message)}",
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
current_card.refresh()
|
|
109
|
+
|
|
110
|
+
def update_only_components(self, current_card, data_object):
|
|
111
|
+
|
|
112
|
+
# update request success data
|
|
113
|
+
self._metrics_charts["request_success"].spec["data"][0]["values"] = [
|
|
114
|
+
{
|
|
115
|
+
"category": "Successful requests",
|
|
116
|
+
"amount": data_object["metrics"]["success"],
|
|
117
|
+
},
|
|
118
|
+
{"category": "Errors", "amount": data_object["metrics"]["error"]},
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
latency_data = []
|
|
122
|
+
times = []
|
|
123
|
+
for m, e in zip(
|
|
124
|
+
data_object["metrics"]["model"], data_object["metrics"]["e2e_time"]
|
|
125
|
+
):
|
|
126
|
+
latency_data.append({"model": m, "e2e_time": e})
|
|
127
|
+
times.append(e)
|
|
128
|
+
|
|
129
|
+
# update latency data
|
|
130
|
+
self._metrics_charts["latency_distribution"].spec["data"][0][
|
|
131
|
+
"values"
|
|
132
|
+
] = latency_data
|
|
133
|
+
|
|
134
|
+
# update domain for latency plot
|
|
135
|
+
min_time = min(times)
|
|
136
|
+
max_time = max(times)
|
|
137
|
+
for scale in self._metrics_charts["latency_distribution"].spec["scales"]:
|
|
138
|
+
if scale["name"] == "xscale":
|
|
139
|
+
scale["domain"] = [min_time - max_time * 0.1, max_time + max_time * 0.1]
|
|
140
|
+
|
|
141
|
+
current_card.refresh()
|
|
142
|
+
|
|
143
|
+
def on_update(self, current_card, data_object):
|
|
144
|
+
data_object_keys = set(data_object.keys())
|
|
145
|
+
if len(data_object_keys) == 0:
|
|
146
|
+
return
|
|
147
|
+
if len(self._metrics_charts) == 0:
|
|
148
|
+
self.render_card_fresh(current_card, data_object)
|
|
149
|
+
return
|
|
150
|
+
elif len(data_object["metrics"]["status_code"]) == 0:
|
|
151
|
+
return
|
|
152
|
+
else:
|
|
153
|
+
self.update_only_components(current_card, data_object)
|
|
154
|
+
return
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
import json
|
|
4
|
-
import requests
|
|
1
|
+
import os, sys, time, json, random, requests, sqlite3
|
|
5
2
|
from urllib.parse import urlparse
|
|
6
3
|
from metaflow.metaflow_config import SERVICE_URL
|
|
7
4
|
from metaflow.metaflow_config_funcs import init_config
|
|
8
|
-
import
|
|
9
|
-
import
|
|
5
|
+
from .utilities import get_storage_path
|
|
6
|
+
from ..nvcf.nvcf import retry_on_status
|
|
7
|
+
|
|
10
8
|
|
|
11
9
|
NVCF_URL = "https://api.nvcf.nvidia.com"
|
|
12
10
|
NVCF_SUBMIT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/functions"
|
|
13
11
|
NVCF_RESULT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/status"
|
|
14
|
-
|
|
15
|
-
COMMON_HEADERS = {
|
|
16
|
-
|
|
12
|
+
NVCF_POLL_INTERVAL_SECONDS = 1
|
|
13
|
+
COMMON_HEADERS = {
|
|
14
|
+
"accept": "application/json",
|
|
15
|
+
"Content-Type": "application/json",
|
|
16
|
+
"nvcf-feature-enable-gateway-timeout": "true",
|
|
17
|
+
}
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class NimMetadata(object):
|
|
@@ -56,41 +57,33 @@ class NimMetadata(object):
|
|
|
56
57
|
def get_nvcf_chat_completion_models(self):
|
|
57
58
|
return self._nvcf_chat_completion_models
|
|
58
59
|
|
|
59
|
-
def get_coreweave_chat_completion_models(self):
|
|
60
|
-
return self._coreweave_chat_completion_models
|
|
61
|
-
|
|
62
60
|
def get_headers_for_nvcf_request(self):
|
|
63
61
|
return {**COMMON_HEADERS, "Authorization": f"Bearer {self._ngc_api_key}"}
|
|
64
62
|
|
|
65
|
-
def get_headers_for_coreweave_request(self):
|
|
66
|
-
return COMMON_HEADERS
|
|
67
|
-
|
|
68
63
|
|
|
69
64
|
class NimManager(object):
|
|
70
|
-
def __init__(self, models, backend):
|
|
65
|
+
def __init__(self, models, backend, flow, step_name, monitor, queue_timeout):
|
|
66
|
+
|
|
71
67
|
nim_metadata = NimMetadata()
|
|
72
68
|
if backend == "managed":
|
|
73
69
|
nvcf_models = [
|
|
74
70
|
m["name"] for m in nim_metadata.get_nvcf_chat_completion_models()
|
|
75
71
|
]
|
|
76
|
-
cw_models = [
|
|
77
|
-
m["name"] for m in nim_metadata.get_coreweave_chat_completion_models()
|
|
78
|
-
]
|
|
79
72
|
|
|
80
73
|
self.models = {}
|
|
81
74
|
for m in models:
|
|
82
75
|
if m in nvcf_models:
|
|
83
76
|
self.models[m] = NimChatCompletion(
|
|
84
|
-
model=m,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
77
|
+
model=m,
|
|
78
|
+
provider="NVCF",
|
|
79
|
+
nim_metadata=nim_metadata,
|
|
80
|
+
monitor=monitor,
|
|
81
|
+
queue_timeout=queue_timeout,
|
|
89
82
|
)
|
|
90
83
|
else:
|
|
91
84
|
raise ValueError(
|
|
92
85
|
f"Model {m} not supported by the Outerbounds @nim offering."
|
|
93
|
-
f"\nYou can choose from these options: {nvcf_models
|
|
86
|
+
f"\nYou can choose from these options: {nvcf_models}\n\n"
|
|
94
87
|
"Reach out to Outerbounds if there are other models you'd like supported."
|
|
95
88
|
)
|
|
96
89
|
else:
|
|
@@ -99,12 +92,21 @@ class NimManager(object):
|
|
|
99
92
|
)
|
|
100
93
|
|
|
101
94
|
|
|
95
|
+
class JobStatus(object):
|
|
96
|
+
SUBMITTED = "SUBMITTED"
|
|
97
|
+
RUNNING = "RUNNING"
|
|
98
|
+
SUCCESSFUL = "SUCCESSFUL"
|
|
99
|
+
FAILED = "FAILED"
|
|
100
|
+
|
|
101
|
+
|
|
102
102
|
class NimChatCompletion(object):
|
|
103
103
|
def __init__(
|
|
104
104
|
self,
|
|
105
105
|
model="meta/llama3-8b-instruct",
|
|
106
|
-
provider="
|
|
106
|
+
provider="NVCF",
|
|
107
107
|
nim_metadata=None,
|
|
108
|
+
monitor=False,
|
|
109
|
+
queue_timeout=None,
|
|
108
110
|
**kwargs,
|
|
109
111
|
):
|
|
110
112
|
if nim_metadata is None:
|
|
@@ -118,19 +120,9 @@ class NimChatCompletion(object):
|
|
|
118
120
|
self.max_request_retries = int(
|
|
119
121
|
os.environ.get("METAFLOW_EXT_HTTP_MAX_RETRIES", "10")
|
|
120
122
|
)
|
|
123
|
+
self.monitor = monitor
|
|
121
124
|
|
|
122
|
-
if self.compute_provider == "
|
|
123
|
-
cw_model_names = [
|
|
124
|
-
m["name"]
|
|
125
|
-
for m in self._nim_metadata.get_coreweave_chat_completion_models()
|
|
126
|
-
]
|
|
127
|
-
self.model = model
|
|
128
|
-
self.ip_address = self._nim_metadata.get_coreweave_chat_completion_models()[
|
|
129
|
-
cw_model_names.index(model)
|
|
130
|
-
]["ip-address"]
|
|
131
|
-
self.endpoint = f"http://{self.ip_address}:8000/v1/chat/completions"
|
|
132
|
-
|
|
133
|
-
elif self.compute_provider == "NVCF":
|
|
125
|
+
if self.compute_provider == "NVCF":
|
|
134
126
|
nvcf_model_names = [
|
|
135
127
|
m["name"] for m in self._nim_metadata.get_nvcf_chat_completion_models()
|
|
136
128
|
]
|
|
@@ -141,45 +133,175 @@ class NimChatCompletion(object):
|
|
|
141
133
|
self.version_id = self._nim_metadata.get_nvcf_chat_completion_models()[
|
|
142
134
|
nvcf_model_names.index(model)
|
|
143
135
|
]["version-id"]
|
|
136
|
+
else:
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f"Backend compute provider {self.compute_provider} not yet supported for @nim."
|
|
139
|
+
)
|
|
144
140
|
|
|
145
|
-
|
|
141
|
+
# to know whether to set file_name
|
|
142
|
+
self.first_request = True
|
|
143
|
+
|
|
144
|
+
# TODO (Eddie) - this may make more sense in a base class.
|
|
145
|
+
# @nim arch needs redesign if customers start using it in more creative ways.
|
|
146
|
+
self._poll_seconds = "3600"
|
|
147
|
+
self._queue_timeout = queue_timeout
|
|
148
|
+
self._status = None
|
|
149
|
+
self._result = {}
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def status(self):
|
|
153
|
+
return self._status
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def has_failed(self):
|
|
157
|
+
return self._status == JobStatus.FAILED
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def is_running(self):
|
|
161
|
+
return self._status == JobStatus.SUBMITTED
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def result(self):
|
|
165
|
+
return self._result
|
|
166
|
+
|
|
167
|
+
def _log_stats(self, response, e2e_time):
|
|
168
|
+
stats = {}
|
|
169
|
+
if response.status_code == 200:
|
|
170
|
+
stats["success"] = 1
|
|
171
|
+
stats["error"] = 0
|
|
172
|
+
else:
|
|
173
|
+
stats["success"] = 0
|
|
174
|
+
stats["error"] = 1
|
|
175
|
+
stats["status_code"] = response.status_code
|
|
176
|
+
try:
|
|
177
|
+
stats["prompt_tokens"] = response.json()["usage"]["prompt_tokens"]
|
|
178
|
+
except KeyError:
|
|
179
|
+
stats["prompt_tokens"] = None
|
|
180
|
+
try:
|
|
181
|
+
stats["completion_tokens"] = response.json()["usage"]["completion_tokens"]
|
|
182
|
+
except KeyError:
|
|
183
|
+
stats["completion_tokens"] = None
|
|
184
|
+
stats["e2e_time"] = e2e_time
|
|
185
|
+
stats["provider"] = self.compute_provider
|
|
186
|
+
stats["model"] = self.model
|
|
146
187
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
188
|
+
conn = sqlite3.connect(self.file_name)
|
|
189
|
+
cursor = conn.cursor()
|
|
190
|
+
try:
|
|
191
|
+
cursor.execute(
|
|
192
|
+
"""
|
|
193
|
+
INSERT INTO metrics (error, success, status_code, prompt_tokens, completion_tokens, e2e_time, model)
|
|
194
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
195
|
+
""",
|
|
196
|
+
(
|
|
197
|
+
stats["error"],
|
|
198
|
+
stats["success"],
|
|
199
|
+
stats["status_code"],
|
|
200
|
+
stats["prompt_tokens"],
|
|
201
|
+
stats["completion_tokens"],
|
|
202
|
+
stats["e2e_time"],
|
|
203
|
+
stats["model"],
|
|
204
|
+
),
|
|
153
205
|
)
|
|
154
|
-
|
|
155
|
-
|
|
206
|
+
conn.commit()
|
|
207
|
+
finally:
|
|
208
|
+
conn.close()
|
|
156
209
|
|
|
157
|
-
|
|
210
|
+
@retry_on_status(status_codes=[504])
|
|
211
|
+
def __call__(self, **kwargs):
|
|
212
|
+
|
|
213
|
+
if self.first_request:
|
|
214
|
+
# Put here to guarantee self.file_name is set after task_id exists.
|
|
215
|
+
from metaflow import current
|
|
158
216
|
|
|
159
|
-
|
|
160
|
-
|
|
217
|
+
self.file_name = get_storage_path(current.task_id)
|
|
218
|
+
|
|
219
|
+
request_data = {"model": self.model, **kwargs}
|
|
220
|
+
request_url = f"{NVCF_SUBMIT_ENDPOINT}/{self.function_id}"
|
|
221
|
+
retry_delay = 1
|
|
222
|
+
attempts = 0
|
|
223
|
+
t0 = time.time()
|
|
224
|
+
while attempts < self.max_request_retries:
|
|
225
|
+
try:
|
|
226
|
+
attempts += 1
|
|
227
|
+
response = requests.post(
|
|
228
|
+
request_url,
|
|
229
|
+
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
230
|
+
json=request_data,
|
|
231
|
+
)
|
|
232
|
+
if response.status_code == 202:
|
|
233
|
+
invocation_id = response.headers.get("NVCF-REQID")
|
|
234
|
+
self.invocations.append(invocation_id)
|
|
235
|
+
self._status = JobStatus.SUBMITTED
|
|
236
|
+
elif response.status_code == 200:
|
|
237
|
+
tf = time.time()
|
|
238
|
+
if self.monitor:
|
|
239
|
+
self._log_stats(response, tf - t0)
|
|
240
|
+
self._status = JobStatus.SUCCESSFUL
|
|
241
|
+
self._result = response.json()
|
|
242
|
+
return self._result
|
|
243
|
+
elif response.status_code == 400:
|
|
244
|
+
self._status = JobStatus.FAILED
|
|
245
|
+
msg = (
|
|
246
|
+
"[@nim ERROR] The OpenAI-compatible returned a 400 status code. "
|
|
247
|
+
+ "Known causes include improper requests or prompts with too many tokens for the selected model. "
|
|
248
|
+
+ "Please contact Outerbounds if you need assistance resolving the issue."
|
|
249
|
+
)
|
|
250
|
+
print(msg, file=sys.stderr)
|
|
251
|
+
self._result = {"ERROR": msg}
|
|
252
|
+
return self._result
|
|
253
|
+
except (
|
|
254
|
+
requests.exceptions.ConnectionError,
|
|
255
|
+
requests.exceptions.ReadTimeout,
|
|
256
|
+
) as e:
|
|
257
|
+
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
258
|
+
# timeouts etc.
|
|
259
|
+
print(
|
|
260
|
+
"received error of type {}. Retrying...".format(type(e)),
|
|
261
|
+
e,
|
|
262
|
+
file=sys.stderr,
|
|
263
|
+
)
|
|
264
|
+
time.sleep(retry_delay)
|
|
265
|
+
retry_delay *= 2 # Double the delay for the next attempt
|
|
266
|
+
retry_delay += random.uniform(0, 1) # Add jitter
|
|
267
|
+
retry_delay = min(retry_delay, 10)
|
|
161
268
|
|
|
269
|
+
@retry_on_status(status_codes=[500], max_retries=3, delay=5)
|
|
270
|
+
@retry_on_status(status_codes=[504])
|
|
271
|
+
def _poll():
|
|
272
|
+
poll_request_url = f"{NVCF_RESULT_ENDPOINT}/{invocation_id}"
|
|
162
273
|
attempts = 0
|
|
274
|
+
retry_delay = 1
|
|
163
275
|
while attempts < self.max_request_retries:
|
|
164
276
|
try:
|
|
165
277
|
attempts += 1
|
|
166
|
-
|
|
167
|
-
|
|
278
|
+
poll_response = requests.get(
|
|
279
|
+
poll_request_url,
|
|
168
280
|
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
169
|
-
json=request_data,
|
|
170
281
|
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
self.
|
|
175
|
-
|
|
176
|
-
return
|
|
282
|
+
if poll_response.status_code == 200:
|
|
283
|
+
tf = time.time()
|
|
284
|
+
self._log_stats(response, tf - t0)
|
|
285
|
+
self._status = JobStatus.SUCCESSFUL
|
|
286
|
+
self._result = poll_response.json()
|
|
287
|
+
return self._result
|
|
288
|
+
elif poll_response.status_code == 202:
|
|
289
|
+
self._status = JobStatus.SUBMITTED
|
|
290
|
+
return 202
|
|
291
|
+
elif poll_response.status_code == 400:
|
|
292
|
+
self._status = JobStatus.FAILED
|
|
293
|
+
msg = (
|
|
294
|
+
"[@nim ERROR] The OpenAI-compatible API returned a 400 status code. "
|
|
295
|
+
+ "Known causes include improper requests or prompts with too many tokens for the selected model. "
|
|
296
|
+
+ "Please contact Outerbounds if you need assistance resolving the issue."
|
|
297
|
+
)
|
|
298
|
+
print(msg, file=sys.stderr)
|
|
299
|
+
self._result = {"@nim ERROR": msg}
|
|
300
|
+
return self._result
|
|
177
301
|
except (
|
|
178
302
|
requests.exceptions.ConnectionError,
|
|
179
303
|
requests.exceptions.ReadTimeout,
|
|
180
304
|
) as e:
|
|
181
|
-
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
182
|
-
# timeouts etc.
|
|
183
305
|
print(
|
|
184
306
|
"received error of type {}. Retrying...".format(type(e)),
|
|
185
307
|
e,
|
|
@@ -190,44 +312,8 @@ class NimChatCompletion(object):
|
|
|
190
312
|
retry_delay += random.uniform(0, 1) # Add jitter
|
|
191
313
|
retry_delay = min(retry_delay, 10)
|
|
192
314
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
attempts += 1
|
|
200
|
-
poll_response = requests.get(
|
|
201
|
-
poll_request_url,
|
|
202
|
-
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
203
|
-
)
|
|
204
|
-
poll_response.raise_for_status()
|
|
205
|
-
if poll_response.status_code == 200:
|
|
206
|
-
return poll_response.json()
|
|
207
|
-
elif poll_response.status_code == 202:
|
|
208
|
-
return 202
|
|
209
|
-
else:
|
|
210
|
-
raise Exception(
|
|
211
|
-
f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
|
|
212
|
-
)
|
|
213
|
-
except (
|
|
214
|
-
requests.exceptions.ConnectionError,
|
|
215
|
-
requests.exceptions.ReadTimeout,
|
|
216
|
-
) as e:
|
|
217
|
-
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
218
|
-
# timeouts etc.
|
|
219
|
-
print(
|
|
220
|
-
"received error of type {}. Retrying...".format(type(e)),
|
|
221
|
-
e,
|
|
222
|
-
file=sys.stderr,
|
|
223
|
-
)
|
|
224
|
-
time.sleep(retry_delay)
|
|
225
|
-
retry_delay *= 2 # Double the delay for the next attempt
|
|
226
|
-
retry_delay += random.uniform(0, 1) # Add jitter
|
|
227
|
-
retry_delay = min(retry_delay, 10)
|
|
228
|
-
|
|
229
|
-
while True:
|
|
230
|
-
data = _poll()
|
|
231
|
-
if data and data != 202:
|
|
232
|
-
return data
|
|
233
|
-
time.sleep(POLL_INTERVAL)
|
|
315
|
+
while True:
|
|
316
|
+
data = _poll()
|
|
317
|
+
if data and data != 202:
|
|
318
|
+
return data
|
|
319
|
+
time.sleep(NVCF_POLL_INTERVAL_SECONDS)
|
{ob_metaflow_extensions-1.1.127.dist-info → ob_metaflow_extensions-1.1.129.dist-info}/METADATA
RENAMED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ob-metaflow-extensions
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.129
|
|
4
4
|
Summary: Outerbounds Platform Extensions for Metaflow
|
|
5
5
|
Author: Outerbounds, Inc.
|
|
6
6
|
License: Commercial
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
Requires-Dist: boto3
|
|
9
9
|
Requires-Dist: kubernetes
|
|
10
|
-
Requires-Dist: ob-metaflow (==2.
|
|
10
|
+
Requires-Dist: ob-metaflow (==2.14.0.1)
|
|
11
11
|
|
|
12
12
|
# Outerbounds platform package
|
|
13
13
|
|