ob-metaflow-extensions 1.1.155__tar.gz → 1.1.156__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/__init__.py +1 -1
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- ob-metaflow-extensions-1.1.155/metaflow_extensions/outerbounds/plugins/nim/__init__.py → ob-metaflow-extensions-1.1.156/metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py +13 -49
- ob-metaflow-extensions-1.1.156/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +318 -0
- ob-metaflow-extensions-1.1.156/metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/ob_metaflow_extensions.egg-info/SOURCES.txt +2 -2
- ob-metaflow-extensions-1.1.156/ob_metaflow_extensions.egg-info/requires.txt +3 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/setup.py +2 -2
- ob-metaflow-extensions-1.1.155/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -318
- ob-metaflow-extensions-1.1.155/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob-metaflow-extensions-1.1.155/ob_metaflow_extensions.egg-info/requires.txt +0 -3
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/README.md +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/nvct.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/nvct/utils.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.155 → ob-metaflow-extensions-1.1.156}/setup.cfg +0 -0
|
@@ -326,7 +326,7 @@ STEP_DECORATORS_DESC = [
|
|
|
326
326
|
("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
|
|
327
327
|
("tensorboard", ".tensorboard.TensorboardDecorator"),
|
|
328
328
|
("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
|
|
329
|
-
("nim", ".nim.NimDecorator"),
|
|
329
|
+
("nim", ".nim.nim_decorator.NimDecorator"),
|
|
330
330
|
("ollama", ".ollama.OllamaDecorator"),
|
|
331
331
|
("app_deploy", ".apps.deploy_decorator.WorkstationAppDeployDecorator"),
|
|
332
332
|
]
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import sqlite3
|
|
2
1
|
from metaflow.cards import Markdown, Table
|
|
3
2
|
from metaflow.metaflow_current import current
|
|
4
3
|
|
|
5
|
-
from .
|
|
4
|
+
from .utils import get_storage_path
|
|
6
5
|
from ..card_utilities.async_cards import CardRefresher
|
|
7
6
|
from ..card_utilities.extra_components import BarPlot, ViolinPlot
|
|
8
7
|
|
|
@@ -17,9 +16,7 @@ class NimMetricsRefresher(CardRefresher):
|
|
|
17
16
|
self._file_name = get_storage_path(current.task_id)
|
|
18
17
|
|
|
19
18
|
def sqlite_fetch_func(self, conn):
|
|
20
|
-
cursor = conn.cursor()
|
|
21
19
|
try:
|
|
22
|
-
conn = sqlite3.connect(self._file_name)
|
|
23
20
|
cursor = conn.cursor()
|
|
24
21
|
cursor.execute(
|
|
25
22
|
"SELECT error, success, status_code, prompt_tokens, completion_tokens, e2e_time, model FROM metrics"
|
|
@@ -85,7 +82,6 @@ class NimMetricsRefresher(CardRefresher):
|
|
|
85
82
|
current_card.refresh()
|
|
86
83
|
|
|
87
84
|
def on_error(self, current_card, error_message):
|
|
88
|
-
|
|
89
85
|
if isinstance(error_message, FileNotFoundError):
|
|
90
86
|
return
|
|
91
87
|
|
|
@@ -99,7 +95,6 @@ class NimMetricsRefresher(CardRefresher):
|
|
|
99
95
|
current_card.refresh()
|
|
100
96
|
|
|
101
97
|
def update_only_components(self, current_card, data_object):
|
|
102
|
-
|
|
103
98
|
# update request success data
|
|
104
99
|
self._metrics_charts["request_success"].spec["data"][0]["values"] = [
|
|
105
100
|
{
|
|
@@ -1,64 +1,31 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
import os, time
|
|
4
|
-
from metaflow.decorators import StepDecorator
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
5
3
|
from metaflow import current
|
|
6
|
-
|
|
4
|
+
from .utils import get_storage_path, NIM_MONITOR_LOCAL_STORAGE_ROOT
|
|
7
5
|
from .nim_manager import NimManager
|
|
6
|
+
from metaflow.decorators import StepDecorator
|
|
8
7
|
from .card import NimMetricsRefresher
|
|
9
|
-
from .utilities import get_storage_path, NIM_MONITOR_LOCAL_STORAGE_ROOT
|
|
10
|
-
from ..card_utilities.async_cards import AsyncPeriodicRefresher
|
|
11
8
|
from ..card_utilities.injector import CardDecoratorInjector
|
|
9
|
+
from ..card_utilities.async_cards import AsyncPeriodicRefresher
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
15
|
-
"""
|
|
16
|
-
This decorator is used to run NIM containers in Metaflow tasks as sidecars.
|
|
17
|
-
|
|
18
|
-
User code call
|
|
19
|
-
-----------
|
|
20
|
-
@nim(
|
|
21
|
-
models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
|
|
22
|
-
backend='managed'
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
Valid backend options
|
|
26
|
-
---------------------
|
|
27
|
-
- 'managed': Outerbounds selects a compute provider based on the model.
|
|
28
|
-
|
|
29
|
-
Valid model options
|
|
30
|
-
----------------
|
|
31
|
-
- 'meta/llama3-8b-instruct': 8B parameter model
|
|
32
|
-
- 'meta/llama3-70b-instruct': 70B parameter model
|
|
33
|
-
- any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
|
|
34
|
-
|
|
35
|
-
Parameters
|
|
36
|
-
----------
|
|
37
|
-
models: list[NIM]
|
|
38
|
-
List of NIM containers running models in sidecars.
|
|
39
|
-
backend: str
|
|
40
|
-
Compute provider to run the NIM container.
|
|
41
|
-
queue_timeout : int
|
|
42
|
-
Time to keep the job in NVCF's queue.
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
13
|
name = "nim"
|
|
14
|
+
|
|
46
15
|
defaults = {
|
|
47
16
|
"models": [],
|
|
48
|
-
"backend": "managed",
|
|
49
17
|
"monitor": True,
|
|
50
18
|
"persist_db": False,
|
|
51
|
-
"queue_timeout": 5 * 24 * 3600, # Default 5 days in seconds
|
|
52
19
|
}
|
|
53
20
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
21
|
+
# Refer https://github.com/Netflix/metaflow/blob/master/docs/lifecycle.png
|
|
22
|
+
# to understand where these functions are invoked in the lifecycle of a
|
|
23
|
+
# Metaflow flow.
|
|
24
|
+
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
58
25
|
if self.attributes["monitor"]:
|
|
59
26
|
self.attach_card_decorator(
|
|
60
27
|
flow,
|
|
61
|
-
|
|
28
|
+
step,
|
|
62
29
|
NimMetricsRefresher.CARD_ID,
|
|
63
30
|
"blank",
|
|
64
31
|
refresh_interval=4.0,
|
|
@@ -68,11 +35,9 @@ class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
68
35
|
{
|
|
69
36
|
"nim": NimManager(
|
|
70
37
|
models=self.attributes["models"],
|
|
71
|
-
backend=self.attributes["backend"],
|
|
72
38
|
flow=flow,
|
|
73
|
-
step_name=
|
|
39
|
+
step_name=step,
|
|
74
40
|
monitor=self.attributes["monitor"],
|
|
75
|
-
queue_timeout=self.attributes["queue_timeout"],
|
|
76
41
|
)
|
|
77
42
|
}
|
|
78
43
|
)
|
|
@@ -81,15 +46,14 @@ class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
81
46
|
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
82
47
|
):
|
|
83
48
|
if self.attributes["monitor"]:
|
|
84
|
-
|
|
85
49
|
import sqlite3
|
|
86
|
-
from metaflow import current
|
|
87
50
|
|
|
88
51
|
file_path = get_storage_path(current.task_id)
|
|
89
52
|
if os.path.exists(file_path):
|
|
90
53
|
os.remove(file_path)
|
|
91
54
|
os.makedirs(NIM_MONITOR_LOCAL_STORAGE_ROOT, exist_ok=True)
|
|
92
55
|
conn = sqlite3.connect(file_path)
|
|
56
|
+
|
|
93
57
|
cursor = conn.cursor()
|
|
94
58
|
cursor.execute(
|
|
95
59
|
"""
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import time
|
|
3
|
+
import requests
|
|
4
|
+
import sqlite3
|
|
5
|
+
from urllib3.util.retry import Retry
|
|
6
|
+
from requests.adapters import HTTPAdapter
|
|
7
|
+
from typing import Dict, Optional, Any
|
|
8
|
+
from .utils import get_ngc_response, get_storage_path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def nvcf_submit_helper(
|
|
12
|
+
url: str,
|
|
13
|
+
payload: Dict[str, Any],
|
|
14
|
+
headers: Optional[Dict[str, str]] = None,
|
|
15
|
+
timeout: int = 30,
|
|
16
|
+
max_retries: int = 300,
|
|
17
|
+
backoff_factor: float = 0.3,
|
|
18
|
+
request_delay: float = 1.1,
|
|
19
|
+
log_callback: Optional[callable] = None,
|
|
20
|
+
) -> Dict[str, Any]:
|
|
21
|
+
def _log_error(start_time: float, status_code: int, poll_count: int):
|
|
22
|
+
if log_callback:
|
|
23
|
+
end_time = time.time()
|
|
24
|
+
try:
|
|
25
|
+
log_callback({}, end_time - start_time, status_code, poll_count)
|
|
26
|
+
except Exception as log_error:
|
|
27
|
+
print(f"Warning: Logging callback failed: {log_error}")
|
|
28
|
+
|
|
29
|
+
# use default headers
|
|
30
|
+
if not headers:
|
|
31
|
+
headers = {"accept": "application/json", "content-type": "application/json"}
|
|
32
|
+
print(f"Using Default Headers: {headers}")
|
|
33
|
+
|
|
34
|
+
# Configure session with retry strategy
|
|
35
|
+
session = requests.Session()
|
|
36
|
+
status_forcelist = [429, 500, 502, 503, 504, 404]
|
|
37
|
+
retry_strategy = Retry(
|
|
38
|
+
total=max_retries,
|
|
39
|
+
backoff_factor=backoff_factor,
|
|
40
|
+
status_forcelist=status_forcelist,
|
|
41
|
+
allowed_methods=["GET", "POST"],
|
|
42
|
+
)
|
|
43
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
44
|
+
session.mount("http://", adapter)
|
|
45
|
+
session.mount("https://", adapter)
|
|
46
|
+
|
|
47
|
+
# Add artificial delay if specified
|
|
48
|
+
time.sleep(request_delay)
|
|
49
|
+
|
|
50
|
+
start_time = time.time()
|
|
51
|
+
poll_count = 0
|
|
52
|
+
status_code = 0
|
|
53
|
+
response_data = {}
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Make initial request
|
|
57
|
+
response = session.post(url, json=payload, headers=headers, timeout=timeout)
|
|
58
|
+
time.sleep(request_delay)
|
|
59
|
+
|
|
60
|
+
# Handle initial response
|
|
61
|
+
response.raise_for_status()
|
|
62
|
+
request_id = response.headers.get("NVCF-REQID")
|
|
63
|
+
polling_url = f"https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/{request_id}"
|
|
64
|
+
|
|
65
|
+
print(f"Polling NVCF Request ID: {request_id}")
|
|
66
|
+
|
|
67
|
+
# Initial response status
|
|
68
|
+
status_code = response.status_code
|
|
69
|
+
print(f"Initial response status: {status_code}")
|
|
70
|
+
|
|
71
|
+
# Create a variable to store the final response
|
|
72
|
+
final_response = response
|
|
73
|
+
|
|
74
|
+
# Continue polling while we get 202 (Accepted/Processing)
|
|
75
|
+
while status_code == 202:
|
|
76
|
+
poll_count += 1
|
|
77
|
+
print(f"Polling attempt #{poll_count} to {polling_url}")
|
|
78
|
+
|
|
79
|
+
# Wait before next poll
|
|
80
|
+
time.sleep(request_delay)
|
|
81
|
+
|
|
82
|
+
# Make a new poll request
|
|
83
|
+
poll_response = session.get(polling_url, headers=headers, timeout=timeout)
|
|
84
|
+
status_code = poll_response.status_code
|
|
85
|
+
print(f"Poll #{poll_count} status: {status_code}")
|
|
86
|
+
|
|
87
|
+
# Check for errors
|
|
88
|
+
try:
|
|
89
|
+
poll_response.raise_for_status()
|
|
90
|
+
except requests.exceptions.HTTPError as e:
|
|
91
|
+
print(f"Poll request failed: {str(e)}")
|
|
92
|
+
poll_response.close()
|
|
93
|
+
# Log the error before re-raising
|
|
94
|
+
_log_error(start_time, poll_response.status_code, poll_count)
|
|
95
|
+
raise
|
|
96
|
+
|
|
97
|
+
# If status is 200, the job is complete
|
|
98
|
+
if status_code == 200:
|
|
99
|
+
print("Polling complete - job finished successfully")
|
|
100
|
+
# Update our final response to be this poll response
|
|
101
|
+
final_response = poll_response
|
|
102
|
+
break
|
|
103
|
+
|
|
104
|
+
# Close this poll response if we're going to loop again
|
|
105
|
+
if status_code == 202:
|
|
106
|
+
poll_response.close()
|
|
107
|
+
|
|
108
|
+
# If we exited the loop without a 200 status, something went wrong
|
|
109
|
+
if status_code != 200:
|
|
110
|
+
print(f"Polling ended with unexpected status: {status_code}")
|
|
111
|
+
# Log the error before raising
|
|
112
|
+
_log_error(start_time, status_code, poll_count)
|
|
113
|
+
raise Exception(f"Unexpected status code after polling: {status_code}")
|
|
114
|
+
|
|
115
|
+
# Get the response data for logging
|
|
116
|
+
response_data = final_response.json()
|
|
117
|
+
|
|
118
|
+
except requests.exceptions.HTTPError as e:
|
|
119
|
+
# Handle HTTP errors (4xx, 5xx status codes)
|
|
120
|
+
status_code = e.response.status_code if e.response else 0
|
|
121
|
+
print(f"HTTP Error: {str(e)}", file=sys.stderr)
|
|
122
|
+
# Log the error
|
|
123
|
+
_log_error(start_time, status_code, poll_count)
|
|
124
|
+
raise
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
# Handle other errors (connection errors, timeouts, etc.)
|
|
128
|
+
print(f"Request Error: {str(e)}", file=sys.stderr)
|
|
129
|
+
# Log the error with status_code 0 to indicate non-HTTP error
|
|
130
|
+
_log_error(start_time, 0, poll_count)
|
|
131
|
+
raise
|
|
132
|
+
|
|
133
|
+
# Calculate final duration and log successful requests
|
|
134
|
+
end_time = time.time()
|
|
135
|
+
duration = end_time - start_time
|
|
136
|
+
|
|
137
|
+
# Call the logging callback if provided
|
|
138
|
+
if log_callback:
|
|
139
|
+
try:
|
|
140
|
+
log_callback(response_data, duration, status_code, poll_count)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
print(f"Warning: Logging callback failed: {e}")
|
|
143
|
+
|
|
144
|
+
# Log metrics
|
|
145
|
+
print(
|
|
146
|
+
f"Request completed: duration={duration:.2f}s, polls={poll_count}, "
|
|
147
|
+
f"status={status_code}, size={len(final_response.content)} bytes"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return response_data
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class NimMetadata(object):
|
|
154
|
+
def __init__(self):
|
|
155
|
+
self._nvcf_chat_completion_models = []
|
|
156
|
+
ngc_response = get_ngc_response()
|
|
157
|
+
|
|
158
|
+
self.ngc_api_key = ngc_response["nvcf"]["api_key"]
|
|
159
|
+
|
|
160
|
+
for model in ngc_response["nvcf"]["functions"]:
|
|
161
|
+
self._nvcf_chat_completion_models.append(
|
|
162
|
+
{
|
|
163
|
+
"name": model["model_key"],
|
|
164
|
+
"function-id": model["id"],
|
|
165
|
+
"version-id": model["version"],
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def get_nvcf_chat_completion_models(self):
|
|
170
|
+
return self._nvcf_chat_completion_models
|
|
171
|
+
|
|
172
|
+
def get_headers_for_nvcf_request(self):
|
|
173
|
+
return {
|
|
174
|
+
"accept": "application/json",
|
|
175
|
+
"content-type": "application/json",
|
|
176
|
+
"Authorization": f"Bearer {self.ngc_api_key}",
|
|
177
|
+
"NVCF-POLL-SECONDS": "5",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class NimManager(object):
|
|
182
|
+
def __init__(self, models, flow, step_name, monitor):
|
|
183
|
+
nim_metadata = NimMetadata()
|
|
184
|
+
nvcf_models = [
|
|
185
|
+
m["name"] for m in nim_metadata.get_nvcf_chat_completion_models()
|
|
186
|
+
]
|
|
187
|
+
self.models = {}
|
|
188
|
+
|
|
189
|
+
for each_model in models:
|
|
190
|
+
if each_model in nvcf_models:
|
|
191
|
+
self.models[each_model] = NimChatCompletion(
|
|
192
|
+
model=each_model,
|
|
193
|
+
nim_metadata=nim_metadata,
|
|
194
|
+
monitor=monitor,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"Model {each_model} not supported by the Outerbounds @nim offering."
|
|
199
|
+
f"\nYou can choose from these options: {nvcf_models}\n\n"
|
|
200
|
+
"Reach out to Outerbounds if there are other models you'd like supported."
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class NimChatCompletion(object):
|
|
205
|
+
def __init__(
|
|
206
|
+
self,
|
|
207
|
+
model: str = "meta/llama3-8b-instruct",
|
|
208
|
+
nim_metadata: NimMetadata = None,
|
|
209
|
+
monitor: bool = False,
|
|
210
|
+
**kwargs,
|
|
211
|
+
):
|
|
212
|
+
if nim_metadata is None:
|
|
213
|
+
raise ValueError(
|
|
214
|
+
"NimMetadata object is required to initialize NimChatCompletion object."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
self.model_name = model
|
|
218
|
+
self.nim_metadata = nim_metadata
|
|
219
|
+
self.monitor = monitor
|
|
220
|
+
|
|
221
|
+
all_nvcf_models = self.nim_metadata.get_nvcf_chat_completion_models()
|
|
222
|
+
all_nvcf_model_names = [m["name"] for m in all_nvcf_models]
|
|
223
|
+
|
|
224
|
+
if self.model_name not in all_nvcf_model_names:
|
|
225
|
+
raise ValueError(
|
|
226
|
+
f"Model {self.model_name} not found in available NVCF models"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
self.model = all_nvcf_models[all_nvcf_model_names.index(self.model_name)]
|
|
230
|
+
self.function_id = self.model["function-id"]
|
|
231
|
+
self.version_id = self.model["version-id"]
|
|
232
|
+
|
|
233
|
+
self.first_request = True
|
|
234
|
+
|
|
235
|
+
def log_stats(self, response_data, duration, status_code, poll_count):
|
|
236
|
+
if not self.monitor:
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
stats = {
|
|
240
|
+
"status_code": status_code,
|
|
241
|
+
"success": 1 if status_code == 200 else 0,
|
|
242
|
+
"error": 0 if status_code == 200 else 1,
|
|
243
|
+
"e2e_time": duration,
|
|
244
|
+
"model": self.model_name,
|
|
245
|
+
"poll_count": poll_count,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if status_code == 200 and response_data:
|
|
249
|
+
try:
|
|
250
|
+
stats["prompt_tokens"] = response_data["usage"]["prompt_tokens"]
|
|
251
|
+
except (KeyError, TypeError):
|
|
252
|
+
stats["prompt_tokens"] = None
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
stats["completion_tokens"] = response_data["usage"]["completion_tokens"]
|
|
256
|
+
except (KeyError, TypeError):
|
|
257
|
+
stats["completion_tokens"] = None
|
|
258
|
+
else:
|
|
259
|
+
stats["prompt_tokens"] = None
|
|
260
|
+
stats["completion_tokens"] = None
|
|
261
|
+
|
|
262
|
+
conn = sqlite3.connect(self.file_name)
|
|
263
|
+
cursor = conn.cursor()
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
cursor.execute(
|
|
267
|
+
"""
|
|
268
|
+
INSERT INTO metrics (error, success, status_code, prompt_tokens, completion_tokens, e2e_time, model)
|
|
269
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
270
|
+
""",
|
|
271
|
+
(
|
|
272
|
+
stats["error"],
|
|
273
|
+
stats["success"],
|
|
274
|
+
stats["status_code"],
|
|
275
|
+
stats["prompt_tokens"],
|
|
276
|
+
stats["completion_tokens"],
|
|
277
|
+
stats["e2e_time"],
|
|
278
|
+
stats["model"],
|
|
279
|
+
),
|
|
280
|
+
)
|
|
281
|
+
conn.commit()
|
|
282
|
+
finally:
|
|
283
|
+
conn.close()
|
|
284
|
+
|
|
285
|
+
def __call__(self, **kwargs):
|
|
286
|
+
if self.first_request:
|
|
287
|
+
from metaflow import current
|
|
288
|
+
|
|
289
|
+
self.file_name = get_storage_path(current.task_id)
|
|
290
|
+
self.first_request = False
|
|
291
|
+
|
|
292
|
+
# Create log callback if monitoring is enabled
|
|
293
|
+
log_callback = self.log_stats if self.monitor else None
|
|
294
|
+
|
|
295
|
+
request_data = {"model": self.model_name, **kwargs}
|
|
296
|
+
request_url = (
|
|
297
|
+
f"https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/{self.function_id}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
response_data = nvcf_submit_helper(
|
|
302
|
+
url=request_url,
|
|
303
|
+
payload=request_data,
|
|
304
|
+
headers=self.nim_metadata.get_headers_for_nvcf_request(),
|
|
305
|
+
log_callback=log_callback,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
return response_data
|
|
309
|
+
|
|
310
|
+
except requests.exceptions.HTTPError as e:
|
|
311
|
+
error_msg = f"[@nim ERROR] NVCF API request failed: {str(e)}"
|
|
312
|
+
print(error_msg, file=sys.stderr)
|
|
313
|
+
raise
|
|
314
|
+
|
|
315
|
+
except Exception as e:
|
|
316
|
+
error_msg = f"[@nim ERROR] Unexpected error: {str(e)}"
|
|
317
|
+
print(error_msg, file=sys.stderr)
|
|
318
|
+
raise
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import requests
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
from metaflow.metaflow_config import SERVICE_URL
|
|
6
|
+
from metaflow.metaflow_config_funcs import init_config
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
NIM_MONITOR_LOCAL_STORAGE_ROOT = ".nim-monitor"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_storage_path(task_id):
|
|
13
|
+
return f"{NIM_MONITOR_LOCAL_STORAGE_ROOT}/" + task_id + ".sqlite"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_ngc_response():
|
|
17
|
+
conf = init_config()
|
|
18
|
+
if "OBP_AUTH_SERVER" in conf:
|
|
19
|
+
auth_host = conf["OBP_AUTH_SERVER"]
|
|
20
|
+
else:
|
|
21
|
+
auth_host = "auth." + urlparse(SERVICE_URL).hostname.split(".", 1)[1]
|
|
22
|
+
|
|
23
|
+
# NOTE: reusing the same auth_host as the one used in NimMetadata,
|
|
24
|
+
# however, user should not need to use nim container to use @nvct.
|
|
25
|
+
# May want to refactor this to a common endpoint.
|
|
26
|
+
nim_info_url = "https://" + auth_host + "/generate/nim"
|
|
27
|
+
|
|
28
|
+
if "METAFLOW_SERVICE_AUTH_KEY" in conf:
|
|
29
|
+
headers = {"x-api-key": conf["METAFLOW_SERVICE_AUTH_KEY"]}
|
|
30
|
+
res = requests.get(nim_info_url, headers=headers)
|
|
31
|
+
else:
|
|
32
|
+
headers = json.loads(os.environ.get("METAFLOW_SERVICE_HEADERS"))
|
|
33
|
+
res = requests.get(nim_info_url, headers=headers)
|
|
34
|
+
|
|
35
|
+
res.raise_for_status()
|
|
36
|
+
return res.json()
|
|
@@ -25,10 +25,10 @@ metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py
|
|
|
25
25
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py
|
|
26
26
|
metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py
|
|
27
27
|
metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py
|
|
28
|
-
metaflow_extensions/outerbounds/plugins/nim/__init__.py
|
|
29
28
|
metaflow_extensions/outerbounds/plugins/nim/card.py
|
|
29
|
+
metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py
|
|
30
30
|
metaflow_extensions/outerbounds/plugins/nim/nim_manager.py
|
|
31
|
-
metaflow_extensions/outerbounds/plugins/nim/
|
|
31
|
+
metaflow_extensions/outerbounds/plugins/nim/utils.py
|
|
32
32
|
metaflow_extensions/outerbounds/plugins/nvcf/__init__.py
|
|
33
33
|
metaflow_extensions/outerbounds/plugins/nvcf/constants.py
|
|
34
34
|
metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
version = "1.1.
|
|
5
|
+
version = "1.1.156"
|
|
6
6
|
this_directory = Path(__file__).parent
|
|
7
7
|
long_description = (this_directory / "README.md").read_text()
|
|
8
8
|
|
|
@@ -18,6 +18,6 @@ setup(
|
|
|
18
18
|
install_requires=[
|
|
19
19
|
"boto3",
|
|
20
20
|
"kubernetes",
|
|
21
|
-
"ob-metaflow == 2.15.
|
|
21
|
+
"ob-metaflow == 2.15.14.1",
|
|
22
22
|
],
|
|
23
23
|
)
|
|
@@ -1,318 +0,0 @@
|
|
|
1
|
-
import os, sys, time, json, random, requests, sqlite3
|
|
2
|
-
from urllib.parse import urlparse
|
|
3
|
-
from metaflow.metaflow_config import SERVICE_URL
|
|
4
|
-
from metaflow.metaflow_config_funcs import init_config
|
|
5
|
-
from .utilities import get_storage_path
|
|
6
|
-
from ..nvcf.nvcf import retry_on_status
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
NVCF_URL = "https://api.nvcf.nvidia.com"
|
|
10
|
-
NVCF_SUBMIT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/functions"
|
|
11
|
-
NVCF_RESULT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/status"
|
|
12
|
-
NVCF_POLL_INTERVAL_SECONDS = 1
|
|
13
|
-
COMMON_HEADERS = {
|
|
14
|
-
"accept": "application/json",
|
|
15
|
-
"Content-Type": "application/json",
|
|
16
|
-
"nvcf-feature-enable-gateway-timeout": "true",
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class NimMetadata(object):
|
|
21
|
-
def __init__(self):
|
|
22
|
-
self._nvcf_chat_completion_models = []
|
|
23
|
-
self._coreweave_chat_completion_models = []
|
|
24
|
-
|
|
25
|
-
conf = init_config()
|
|
26
|
-
|
|
27
|
-
if "OBP_AUTH_SERVER" in conf:
|
|
28
|
-
auth_host = conf["OBP_AUTH_SERVER"]
|
|
29
|
-
else:
|
|
30
|
-
auth_host = "auth." + urlparse(SERVICE_URL).hostname.split(".", 1)[1]
|
|
31
|
-
|
|
32
|
-
nim_info_url = "https://" + auth_host + "/generate/nim"
|
|
33
|
-
|
|
34
|
-
if "METAFLOW_SERVICE_AUTH_KEY" in conf:
|
|
35
|
-
headers = {"x-api-key": conf["METAFLOW_SERVICE_AUTH_KEY"]}
|
|
36
|
-
res = requests.get(nim_info_url, headers=headers)
|
|
37
|
-
else:
|
|
38
|
-
headers = json.loads(os.environ.get("METAFLOW_SERVICE_HEADERS"))
|
|
39
|
-
res = requests.get(nim_info_url, headers=headers)
|
|
40
|
-
|
|
41
|
-
res.raise_for_status()
|
|
42
|
-
self._ngc_api_key = res.json()["nvcf"]["api_key"]
|
|
43
|
-
|
|
44
|
-
for model in res.json()["nvcf"]["functions"]:
|
|
45
|
-
self._nvcf_chat_completion_models.append(
|
|
46
|
-
{
|
|
47
|
-
"name": model["model_key"],
|
|
48
|
-
"function-id": model["id"],
|
|
49
|
-
"version-id": model["version"],
|
|
50
|
-
}
|
|
51
|
-
)
|
|
52
|
-
for model in res.json()["coreweave"]["containers"]:
|
|
53
|
-
self._coreweave_chat_completion_models.append(
|
|
54
|
-
{"name": model["nim_name"], "ip-address": model["ip_addr"]}
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
def get_nvcf_chat_completion_models(self):
|
|
58
|
-
return self._nvcf_chat_completion_models
|
|
59
|
-
|
|
60
|
-
def get_headers_for_nvcf_request(self):
|
|
61
|
-
return {**COMMON_HEADERS, "Authorization": f"Bearer {self._ngc_api_key}"}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class NimManager(object):
|
|
65
|
-
def __init__(self, models, backend, flow, step_name, monitor, queue_timeout):
|
|
66
|
-
|
|
67
|
-
nim_metadata = NimMetadata()
|
|
68
|
-
if backend == "managed":
|
|
69
|
-
nvcf_models = [
|
|
70
|
-
m["name"] for m in nim_metadata.get_nvcf_chat_completion_models()
|
|
71
|
-
]
|
|
72
|
-
|
|
73
|
-
self.models = {}
|
|
74
|
-
for m in models:
|
|
75
|
-
if m in nvcf_models:
|
|
76
|
-
self.models[m] = NimChatCompletion(
|
|
77
|
-
model=m,
|
|
78
|
-
provider="NVCF",
|
|
79
|
-
nim_metadata=nim_metadata,
|
|
80
|
-
monitor=monitor,
|
|
81
|
-
queue_timeout=queue_timeout,
|
|
82
|
-
)
|
|
83
|
-
else:
|
|
84
|
-
raise ValueError(
|
|
85
|
-
f"Model {m} not supported by the Outerbounds @nim offering."
|
|
86
|
-
f"\nYou can choose from these options: {nvcf_models}\n\n"
|
|
87
|
-
"Reach out to Outerbounds if there are other models you'd like supported."
|
|
88
|
-
)
|
|
89
|
-
else:
|
|
90
|
-
raise ValueError(
|
|
91
|
-
f"Backend {backend} not supported by the Outerbounds @nim offering. Please reach out to Outerbounds."
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
class JobStatus(object):
|
|
96
|
-
SUBMITTED = "SUBMITTED"
|
|
97
|
-
RUNNING = "RUNNING"
|
|
98
|
-
SUCCESSFUL = "SUCCESSFUL"
|
|
99
|
-
FAILED = "FAILED"
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class NimChatCompletion(object):
|
|
103
|
-
def __init__(
|
|
104
|
-
self,
|
|
105
|
-
model="meta/llama3-8b-instruct",
|
|
106
|
-
provider="NVCF",
|
|
107
|
-
nim_metadata=None,
|
|
108
|
-
monitor=False,
|
|
109
|
-
queue_timeout=None,
|
|
110
|
-
**kwargs,
|
|
111
|
-
):
|
|
112
|
-
if nim_metadata is None:
|
|
113
|
-
raise ValueError(
|
|
114
|
-
"NimMetadata object is required to initialize NimChatCompletion object."
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
self._nim_metadata = nim_metadata
|
|
118
|
-
self.compute_provider = provider
|
|
119
|
-
self.invocations = []
|
|
120
|
-
self.max_request_retries = int(
|
|
121
|
-
os.environ.get("METAFLOW_EXT_HTTP_MAX_RETRIES", "10")
|
|
122
|
-
)
|
|
123
|
-
self.monitor = monitor
|
|
124
|
-
|
|
125
|
-
if self.compute_provider == "NVCF":
|
|
126
|
-
nvcf_model_names = [
|
|
127
|
-
m["name"] for m in self._nim_metadata.get_nvcf_chat_completion_models()
|
|
128
|
-
]
|
|
129
|
-
self.model = model
|
|
130
|
-
self.function_id = self._nim_metadata.get_nvcf_chat_completion_models()[
|
|
131
|
-
nvcf_model_names.index(model)
|
|
132
|
-
]["function-id"]
|
|
133
|
-
self.version_id = self._nim_metadata.get_nvcf_chat_completion_models()[
|
|
134
|
-
nvcf_model_names.index(model)
|
|
135
|
-
]["version-id"]
|
|
136
|
-
else:
|
|
137
|
-
raise ValueError(
|
|
138
|
-
f"Backend compute provider {self.compute_provider} not yet supported for @nim."
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# to know whether to set file_name
|
|
142
|
-
self.first_request = True
|
|
143
|
-
|
|
144
|
-
# TODO (Eddie) - this may make more sense in a base class.
|
|
145
|
-
# @nim arch needs redesign if customers start using it in more creative ways.
|
|
146
|
-
self._poll_seconds = "3600"
|
|
147
|
-
self._queue_timeout = queue_timeout
|
|
148
|
-
self._status = None
|
|
149
|
-
self._result = {}
|
|
150
|
-
|
|
151
|
-
@property
|
|
152
|
-
def status(self):
|
|
153
|
-
return self._status
|
|
154
|
-
|
|
155
|
-
@property
|
|
156
|
-
def has_failed(self):
|
|
157
|
-
return self._status == JobStatus.FAILED
|
|
158
|
-
|
|
159
|
-
@property
|
|
160
|
-
def is_running(self):
|
|
161
|
-
return self._status == JobStatus.SUBMITTED
|
|
162
|
-
|
|
163
|
-
@property
|
|
164
|
-
def result(self):
|
|
165
|
-
return self._result
|
|
166
|
-
|
|
167
|
-
def _log_stats(self, response, e2e_time):
|
|
168
|
-
stats = {}
|
|
169
|
-
if response.status_code == 200:
|
|
170
|
-
stats["success"] = 1
|
|
171
|
-
stats["error"] = 0
|
|
172
|
-
else:
|
|
173
|
-
stats["success"] = 0
|
|
174
|
-
stats["error"] = 1
|
|
175
|
-
stats["status_code"] = response.status_code
|
|
176
|
-
try:
|
|
177
|
-
stats["prompt_tokens"] = response.json()["usage"]["prompt_tokens"]
|
|
178
|
-
except KeyError:
|
|
179
|
-
stats["prompt_tokens"] = None
|
|
180
|
-
try:
|
|
181
|
-
stats["completion_tokens"] = response.json()["usage"]["completion_tokens"]
|
|
182
|
-
except KeyError:
|
|
183
|
-
stats["completion_tokens"] = None
|
|
184
|
-
stats["e2e_time"] = e2e_time
|
|
185
|
-
stats["provider"] = self.compute_provider
|
|
186
|
-
stats["model"] = self.model
|
|
187
|
-
|
|
188
|
-
conn = sqlite3.connect(self.file_name)
|
|
189
|
-
cursor = conn.cursor()
|
|
190
|
-
try:
|
|
191
|
-
cursor.execute(
|
|
192
|
-
"""
|
|
193
|
-
INSERT INTO metrics (error, success, status_code, prompt_tokens, completion_tokens, e2e_time, model)
|
|
194
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
195
|
-
""",
|
|
196
|
-
(
|
|
197
|
-
stats["error"],
|
|
198
|
-
stats["success"],
|
|
199
|
-
stats["status_code"],
|
|
200
|
-
stats["prompt_tokens"],
|
|
201
|
-
stats["completion_tokens"],
|
|
202
|
-
stats["e2e_time"],
|
|
203
|
-
stats["model"],
|
|
204
|
-
),
|
|
205
|
-
)
|
|
206
|
-
conn.commit()
|
|
207
|
-
finally:
|
|
208
|
-
conn.close()
|
|
209
|
-
|
|
210
|
-
@retry_on_status(status_codes=[500], max_retries=3, delay=5)
|
|
211
|
-
@retry_on_status(status_codes=[504])
|
|
212
|
-
def __call__(self, **kwargs):
|
|
213
|
-
|
|
214
|
-
if self.first_request:
|
|
215
|
-
# Put here to guarantee self.file_name is set after task_id exists.
|
|
216
|
-
from metaflow import current
|
|
217
|
-
|
|
218
|
-
self.file_name = get_storage_path(current.task_id)
|
|
219
|
-
|
|
220
|
-
request_data = {"model": self.model, **kwargs}
|
|
221
|
-
request_url = f"{NVCF_SUBMIT_ENDPOINT}/{self.function_id}"
|
|
222
|
-
retry_delay = 1
|
|
223
|
-
attempts = 0
|
|
224
|
-
t0 = time.time()
|
|
225
|
-
while attempts < self.max_request_retries:
|
|
226
|
-
try:
|
|
227
|
-
attempts += 1
|
|
228
|
-
response = requests.post(
|
|
229
|
-
request_url,
|
|
230
|
-
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
231
|
-
json=request_data,
|
|
232
|
-
)
|
|
233
|
-
if response.status_code == 202:
|
|
234
|
-
invocation_id = response.headers.get("NVCF-REQID")
|
|
235
|
-
self.invocations.append(invocation_id)
|
|
236
|
-
self._status = JobStatus.SUBMITTED
|
|
237
|
-
elif response.status_code == 200:
|
|
238
|
-
tf = time.time()
|
|
239
|
-
if self.monitor:
|
|
240
|
-
self._log_stats(response, tf - t0)
|
|
241
|
-
self._status = JobStatus.SUCCESSFUL
|
|
242
|
-
self._result = response.json()
|
|
243
|
-
return self._result
|
|
244
|
-
elif response.status_code == 400:
|
|
245
|
-
self._status = JobStatus.FAILED
|
|
246
|
-
msg = (
|
|
247
|
-
"[@nim ERROR] The OpenAI-compatible returned a 400 status code. "
|
|
248
|
-
+ "Known causes include improper requests or prompts with too many tokens for the selected model. "
|
|
249
|
-
+ "Please contact Outerbounds if you need assistance resolving the issue."
|
|
250
|
-
)
|
|
251
|
-
print(msg, file=sys.stderr)
|
|
252
|
-
self._result = {"ERROR": msg}
|
|
253
|
-
return self._result
|
|
254
|
-
except (
|
|
255
|
-
requests.exceptions.ConnectionError,
|
|
256
|
-
requests.exceptions.ReadTimeout,
|
|
257
|
-
) as e:
|
|
258
|
-
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
259
|
-
# timeouts etc.
|
|
260
|
-
print(
|
|
261
|
-
"received error of type {}. Retrying...".format(type(e)),
|
|
262
|
-
e,
|
|
263
|
-
file=sys.stderr,
|
|
264
|
-
)
|
|
265
|
-
time.sleep(retry_delay)
|
|
266
|
-
retry_delay *= 2 # Double the delay for the next attempt
|
|
267
|
-
retry_delay += random.uniform(0, 1) # Add jitter
|
|
268
|
-
retry_delay = min(retry_delay, 10)
|
|
269
|
-
|
|
270
|
-
def _poll():
|
|
271
|
-
poll_request_url = f"{NVCF_RESULT_ENDPOINT}/{invocation_id}"
|
|
272
|
-
attempts = 0
|
|
273
|
-
retry_delay = 1
|
|
274
|
-
while attempts < self.max_request_retries:
|
|
275
|
-
try:
|
|
276
|
-
attempts += 1
|
|
277
|
-
poll_response = requests.get(
|
|
278
|
-
poll_request_url,
|
|
279
|
-
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
280
|
-
)
|
|
281
|
-
if poll_response.status_code == 200:
|
|
282
|
-
tf = time.time()
|
|
283
|
-
self._log_stats(response, tf - t0)
|
|
284
|
-
self._status = JobStatus.SUCCESSFUL
|
|
285
|
-
self._result = poll_response.json()
|
|
286
|
-
return self._result
|
|
287
|
-
elif poll_response.status_code == 202:
|
|
288
|
-
self._status = JobStatus.SUBMITTED
|
|
289
|
-
return 202
|
|
290
|
-
elif poll_response.status_code == 400:
|
|
291
|
-
self._status = JobStatus.FAILED
|
|
292
|
-
msg = (
|
|
293
|
-
"[@nim ERROR] The OpenAI-compatible API returned a 400 status code. "
|
|
294
|
-
+ "Known causes include improper requests or prompts with too many tokens for the selected model. "
|
|
295
|
-
+ "Please contact Outerbounds if you need assistance resolving the issue."
|
|
296
|
-
)
|
|
297
|
-
print(msg, file=sys.stderr)
|
|
298
|
-
self._result = {"@nim ERROR": msg}
|
|
299
|
-
return self._result
|
|
300
|
-
except (
|
|
301
|
-
requests.exceptions.ConnectionError,
|
|
302
|
-
requests.exceptions.ReadTimeout,
|
|
303
|
-
) as e:
|
|
304
|
-
print(
|
|
305
|
-
"received error of type {}. Retrying...".format(type(e)),
|
|
306
|
-
e,
|
|
307
|
-
file=sys.stderr,
|
|
308
|
-
)
|
|
309
|
-
time.sleep(retry_delay)
|
|
310
|
-
retry_delay *= 2 # Double the delay for the next attempt
|
|
311
|
-
retry_delay += random.uniform(0, 1) # Add jitter
|
|
312
|
-
retry_delay = min(retry_delay, 10)
|
|
313
|
-
|
|
314
|
-
while True:
|
|
315
|
-
data = _poll()
|
|
316
|
-
if data and data != 202:
|
|
317
|
-
return data
|
|
318
|
-
time.sleep(NVCF_POLL_INTERVAL_SECONDS)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|