ob-metaflow-extensions 1.1.171rc1__py2.py3-none-any.whl → 1.4.39__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/plugins/__init__.py +6 -3
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -29
- metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +68 -15
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +6 -2
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +8 -8
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +37 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +45 -18
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +18 -9
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +10 -4
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +173 -95
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +9 -9
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +159 -9
- metaflow_extensions/outerbounds/remote_config.py +8 -3
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +62 -1
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +2 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.171rc1.dist-info → ob_metaflow_extensions-1.4.39.dist-info}/METADATA +2 -2
- {ob_metaflow_extensions-1.1.171rc1.dist-info → ob_metaflow_extensions-1.4.39.dist-info}/RECORD +67 -25
- {ob_metaflow_extensions-1.1.171rc1.dist-info → ob_metaflow_extensions-1.4.39.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.171rc1.dist-info → ob_metaflow_extensions-1.4.39.dist-info}/top_level.txt +0 -0
|
@@ -10,14 +10,15 @@ from metaflow.exception import MetaflowException
|
|
|
10
10
|
class SnowparkClient(object):
|
|
11
11
|
def __init__(
|
|
12
12
|
self,
|
|
13
|
-
account: str,
|
|
14
|
-
user: str,
|
|
15
|
-
password: str,
|
|
16
|
-
role: str,
|
|
17
|
-
database: str,
|
|
18
|
-
warehouse: str,
|
|
19
|
-
schema: str,
|
|
13
|
+
account: str = None,
|
|
14
|
+
user: str = None,
|
|
15
|
+
password: str = None,
|
|
16
|
+
role: str = None,
|
|
17
|
+
database: str = None,
|
|
18
|
+
warehouse: str = None,
|
|
19
|
+
schema: str = None,
|
|
20
20
|
autocommit: bool = True,
|
|
21
|
+
integration: str = None,
|
|
21
22
|
):
|
|
22
23
|
try:
|
|
23
24
|
from snowflake.core import Root
|
|
@@ -27,22 +28,48 @@ class SnowparkClient(object):
|
|
|
27
28
|
except (NameError, ImportError, ModuleNotFoundError):
|
|
28
29
|
raise SnowflakeException(
|
|
29
30
|
"Could not import module 'snowflake'.\n\nInstall Snowflake "
|
|
30
|
-
"Python
|
|
31
|
-
"
|
|
32
|
-
"
|
|
31
|
+
"Python packages first:\n"
|
|
32
|
+
" snowflake==1.8.0\n"
|
|
33
|
+
" snowflake-connector-python==3.18.0\n"
|
|
34
|
+
" snowflake-snowpark-python==1.40.0\n\n"
|
|
35
|
+
"You can install them by executing:\n"
|
|
36
|
+
"%s -m pip install snowflake==1.8.0 snowflake-connector-python==3.18.0 snowflake-snowpark-python==1.40.0\n"
|
|
33
37
|
"or equivalent through your favorite Python package manager."
|
|
34
38
|
% sys.executable
|
|
35
39
|
)
|
|
36
40
|
|
|
41
|
+
if integration:
|
|
42
|
+
# Use OAuth authentication via Outerbounds integration
|
|
43
|
+
from metaflow_extensions.outerbounds.plugins.snowflake.snowflake import (
|
|
44
|
+
get_oauth_connection_params,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
self.connection_parameters = get_oauth_connection_params(
|
|
48
|
+
user=user or "",
|
|
49
|
+
role=role or "",
|
|
50
|
+
integration=integration,
|
|
51
|
+
schema=schema or "",
|
|
52
|
+
account=account,
|
|
53
|
+
warehouse=warehouse,
|
|
54
|
+
database=database,
|
|
55
|
+
)
|
|
56
|
+
self.connection_parameters["autocommit"] = autocommit
|
|
57
|
+
else:
|
|
58
|
+
# Password-based authentication
|
|
59
|
+
self.connection_parameters = {
|
|
60
|
+
"account": account,
|
|
61
|
+
"user": user,
|
|
62
|
+
"password": password,
|
|
63
|
+
"role": role,
|
|
64
|
+
"warehouse": warehouse,
|
|
65
|
+
"database": database,
|
|
66
|
+
"schema": schema,
|
|
67
|
+
"autocommit": autocommit,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Remove None values from connection parameters
|
|
37
71
|
self.connection_parameters = {
|
|
38
|
-
|
|
39
|
-
"user": user,
|
|
40
|
-
"password": password,
|
|
41
|
-
"role": role,
|
|
42
|
-
"warehouse": warehouse,
|
|
43
|
-
"database": database,
|
|
44
|
-
"schema": schema,
|
|
45
|
-
"autocommit": autocommit,
|
|
72
|
+
k: v for k, v in self.connection_parameters.items() if v is not None
|
|
46
73
|
}
|
|
47
74
|
|
|
48
75
|
try:
|
|
@@ -42,10 +42,13 @@ class Snowflake(object):
|
|
|
42
42
|
return session
|
|
43
43
|
except (NameError, ImportError, ModuleNotFoundError):
|
|
44
44
|
raise SnowflakeException(
|
|
45
|
-
"Could not import module 'snowflake'.\n\
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
"
|
|
45
|
+
"Could not import module 'snowflake'.\n\n"
|
|
46
|
+
"Install required Snowflake packages using the @pypi decorator:\n"
|
|
47
|
+
"@pypi(packages={\n"
|
|
48
|
+
" 'snowflake': '1.8.0',\n"
|
|
49
|
+
" 'snowflake-connector-python': '3.18.0',\n"
|
|
50
|
+
" 'snowflake-snowpark-python': '1.40.0'\n"
|
|
51
|
+
"})\n"
|
|
49
52
|
)
|
|
50
53
|
|
|
51
54
|
|
|
@@ -68,6 +71,7 @@ class SnowparkDecorator(StepDecorator):
|
|
|
68
71
|
"cpu": None,
|
|
69
72
|
"gpu": None,
|
|
70
73
|
"memory": None,
|
|
74
|
+
"integration": None, # Outerbounds OAuth integration name
|
|
71
75
|
}
|
|
72
76
|
|
|
73
77
|
package_url = None
|
|
@@ -77,12 +81,11 @@ class SnowparkDecorator(StepDecorator):
|
|
|
77
81
|
def __init__(self, attributes=None, statically_defined=False):
|
|
78
82
|
super(SnowparkDecorator, self).__init__(attributes, statically_defined)
|
|
79
83
|
|
|
84
|
+
# Set defaults from config (user can override via decorator or integration)
|
|
80
85
|
if not self.attributes["account"]:
|
|
81
86
|
self.attributes["account"] = SNOWPARK_ACCOUNT
|
|
82
87
|
if not self.attributes["user"]:
|
|
83
88
|
self.attributes["user"] = SNOWPARK_USER
|
|
84
|
-
if not self.attributes["password"]:
|
|
85
|
-
self.attributes["password"] = SNOWPARK_PASSWORD
|
|
86
89
|
if not self.attributes["role"]:
|
|
87
90
|
self.attributes["role"] = SNOWPARK_ROLE
|
|
88
91
|
if not self.attributes["database"]:
|
|
@@ -91,6 +94,9 @@ class SnowparkDecorator(StepDecorator):
|
|
|
91
94
|
self.attributes["warehouse"] = SNOWPARK_WAREHOUSE
|
|
92
95
|
if not self.attributes["schema"]:
|
|
93
96
|
self.attributes["schema"] = SNOWPARK_SCHEMA
|
|
97
|
+
# Only use password from config if not using integration (OAuth)
|
|
98
|
+
if not self.attributes["integration"] and not self.attributes["password"]:
|
|
99
|
+
self.attributes["password"] = SNOWPARK_PASSWORD
|
|
94
100
|
|
|
95
101
|
# If no docker image is explicitly specified, impute a default image.
|
|
96
102
|
if not self.attributes["image"]:
|
|
@@ -143,9 +149,12 @@ class SnowparkDecorator(StepDecorator):
|
|
|
143
149
|
except (NameError, ImportError, ModuleNotFoundError):
|
|
144
150
|
raise SnowflakeException(
|
|
145
151
|
"Could not import module 'snowflake'.\n\nInstall Snowflake "
|
|
146
|
-
"Python
|
|
147
|
-
"
|
|
148
|
-
"
|
|
152
|
+
"Python packages first:\n"
|
|
153
|
+
" snowflake==1.8.0\n"
|
|
154
|
+
" snowflake-connector-python==3.18.0\n"
|
|
155
|
+
" snowflake-snowpark-python==1.40.0\n\n"
|
|
156
|
+
"You can install them by executing:\n"
|
|
157
|
+
"%s -m pip install snowflake==1.8.0 snowflake-connector-python==3.18.0 snowflake-snowpark-python==1.40.0\n"
|
|
149
158
|
"or equivalent through your favorite Python package manager."
|
|
150
159
|
% sys.executable
|
|
151
160
|
)
|
|
@@ -12,9 +12,9 @@ from .snowpark_exceptions import SnowparkException
|
|
|
12
12
|
mapping = str.maketrans("0123456789", "abcdefghij")
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
# keep only alpha numeric characters and
|
|
15
|
+
# keep only alpha numeric characters and dashes..
|
|
16
16
|
def sanitize_name(job_name: str):
|
|
17
|
-
return "".join(char for char in job_name if char.isalnum() or char == "
|
|
17
|
+
return "".join(char for char in job_name if char.isalnum() or char == "-")
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
# this is not a decorator since the exception imports need to be inside
|
|
@@ -199,11 +199,17 @@ class RunningJob(object):
|
|
|
199
199
|
|
|
200
200
|
@property
|
|
201
201
|
def status(self):
|
|
202
|
-
|
|
202
|
+
status_list = self.status_obj()
|
|
203
|
+
if not status_list:
|
|
204
|
+
return "UNKNOWN"
|
|
205
|
+
return status_list[0].get("status", "UNKNOWN")
|
|
203
206
|
|
|
204
207
|
@property
|
|
205
208
|
def message(self):
|
|
206
|
-
|
|
209
|
+
status_list = self.status_obj()
|
|
210
|
+
if not status_list:
|
|
211
|
+
return None
|
|
212
|
+
return status_list[0].get("message")
|
|
207
213
|
|
|
208
214
|
@property
|
|
209
215
|
def is_waiting(self):
|
|
@@ -13,12 +13,14 @@ class TorchTune:
|
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
15
|
use_multi_node_config: bool = False,
|
|
16
|
+
config_overrides: Optional[Dict] = None,
|
|
16
17
|
):
|
|
17
18
|
"""
|
|
18
19
|
Initialize the Tune launcher.
|
|
19
20
|
|
|
20
21
|
:param use_multi_node_config: If True, attempt to build a distributed configuration
|
|
21
22
|
from current.torch.torchrun_args.
|
|
23
|
+
:param config_overrides: Optional dictionary of config overrides for tune run.
|
|
22
24
|
"""
|
|
23
25
|
self.multi_node_config = {}
|
|
24
26
|
if use_multi_node_config:
|
|
@@ -37,6 +39,8 @@ class TorchTune:
|
|
|
37
39
|
"num_processes": current.torch.torchrun_args["nproc_per_node"]
|
|
38
40
|
* current.torch.torchrun_args["nnodes"],
|
|
39
41
|
}
|
|
42
|
+
if config_overrides:
|
|
43
|
+
self.multi_node_config.update(config_overrides)
|
|
40
44
|
print(
|
|
41
45
|
f"[Metaflow Tune] Discovered multi-node config for torchrun: {self.multi_node_config}"
|
|
42
46
|
)
|
|
@@ -1,17 +1,30 @@
|
|
|
1
1
|
from metaflow.decorators import StepDecorator
|
|
2
2
|
from metaflow import current
|
|
3
3
|
import functools
|
|
4
|
-
import
|
|
4
|
+
from enum import Enum
|
|
5
5
|
import threading
|
|
6
6
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
7
7
|
from metaflow.metaflow_config import from_conf
|
|
8
8
|
|
|
9
|
-
from .vllm_manager import
|
|
9
|
+
from .vllm_manager import VLLMOpenAIManager, VLLMPyManager
|
|
10
10
|
from .status_card import VLLMStatusCard, CardDecoratorInjector
|
|
11
11
|
|
|
12
12
|
__mf_promote_submodules__ = ["plugins.vllm"]
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
### The following classes are used to store the vLLM information in the current environment.
|
|
16
|
+
# Then, Metaflow users can access the vLLM information through the current environment.
|
|
17
|
+
class OpenAIAPIInfo:
|
|
18
|
+
def __init__(self, local_endpoint, local_api_key):
|
|
19
|
+
self.local_endpoint = local_endpoint
|
|
20
|
+
self.local_api_key = local_api_key
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class VLLM:
|
|
24
|
+
def __init__(self, llm):
|
|
25
|
+
self.llm = llm
|
|
26
|
+
|
|
27
|
+
|
|
15
28
|
class VLLMDecorator(StepDecorator, CardDecoratorInjector):
|
|
16
29
|
"""
|
|
17
30
|
This decorator is used to run vllm APIs as Metaflow task sidecars.
|
|
@@ -40,11 +53,23 @@ class VLLMDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
40
53
|
HuggingFace model identifier to be served by vLLM.
|
|
41
54
|
backend: str
|
|
42
55
|
Determines where and how to run the vLLM process.
|
|
56
|
+
openai_api_server: bool
|
|
57
|
+
Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
|
|
58
|
+
Default is False (uses native engine).
|
|
59
|
+
Set to True for backward compatibility with existing code.
|
|
43
60
|
debug: bool
|
|
44
61
|
Whether to turn on verbose debugging logs.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
62
|
+
card_refresh_interval: int
|
|
63
|
+
Interval in seconds for refreshing the vLLM status card.
|
|
64
|
+
Only used when openai_api_server=True.
|
|
65
|
+
max_retries: int
|
|
66
|
+
Maximum number of retries checking for vLLM server startup.
|
|
67
|
+
Only used when openai_api_server=True.
|
|
68
|
+
retry_alert_frequency: int
|
|
69
|
+
Frequency of alert logs for vLLM server startup retries.
|
|
70
|
+
Only used when openai_api_server=True.
|
|
71
|
+
engine_args : dict
|
|
72
|
+
Additional keyword arguments to pass to the vLLM engine.
|
|
48
73
|
For example, `tensor_parallel_size=2`.
|
|
49
74
|
"""
|
|
50
75
|
|
|
@@ -52,9 +77,12 @@ class VLLMDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
52
77
|
defaults = {
|
|
53
78
|
"model": None,
|
|
54
79
|
"backend": "local",
|
|
80
|
+
"openai_api_server": False, # Default to native engine
|
|
55
81
|
"debug": False,
|
|
56
82
|
"stream_logs_to_card": False,
|
|
57
83
|
"card_refresh_interval": 10,
|
|
84
|
+
"max_retries": 60,
|
|
85
|
+
"retry_alert_frequency": 5,
|
|
58
86
|
"engine_args": {},
|
|
59
87
|
}
|
|
60
88
|
|
|
@@ -72,106 +100,156 @@ class VLLMDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
72
100
|
f"Example: @vllm(model='meta-llama/Llama-3.2-1B')"
|
|
73
101
|
)
|
|
74
102
|
|
|
75
|
-
# Attach the vllm status card
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
103
|
+
# Attach the vllm status card only for API server mode
|
|
104
|
+
if self.attributes["openai_api_server"]:
|
|
105
|
+
self.attach_card_decorator(
|
|
106
|
+
flow,
|
|
107
|
+
step_name,
|
|
108
|
+
"vllm_status",
|
|
109
|
+
"blank",
|
|
110
|
+
refresh_interval=self.attributes["card_refresh_interval"],
|
|
111
|
+
)
|
|
83
112
|
|
|
84
113
|
def task_decorate(
|
|
85
114
|
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
86
115
|
):
|
|
87
116
|
@functools.wraps(step_func)
|
|
88
117
|
def vllm_wrapper():
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
118
|
+
# FIXME: Kind of ugly branch. Causing branching elsewhere.
|
|
119
|
+
# Other possibile code paths:
|
|
120
|
+
# - OpenAI batch API
|
|
121
|
+
# - Embedding
|
|
122
|
+
# - Special types of models
|
|
123
|
+
if self.attributes["openai_api_server"]:
|
|
124
|
+
# API Server mode (existing functionality)
|
|
125
|
+
self._run_api_server_mode(step_func)
|
|
126
|
+
else:
|
|
127
|
+
# Native engine mode (new functionality)
|
|
128
|
+
self._run_native_engine_mode(step_func)
|
|
92
129
|
|
|
93
|
-
|
|
94
|
-
self.status_card = VLLMStatusCard(
|
|
95
|
-
refresh_interval=self.attributes["card_refresh_interval"]
|
|
96
|
-
)
|
|
130
|
+
return vllm_wrapper
|
|
97
131
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
132
|
+
def _run_api_server_mode(self, step_func):
|
|
133
|
+
"""Run vLLM in API server mode (subprocess, existing functionality)"""
|
|
134
|
+
self.vllm_manager = None
|
|
135
|
+
self.status_card = None
|
|
136
|
+
self.card_monitor_thread = None
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
self.status_card = VLLMStatusCard(
|
|
140
|
+
refresh_interval=self.attributes["card_refresh_interval"]
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def monitor_card():
|
|
144
|
+
try:
|
|
145
|
+
self.status_card.on_startup(current.card["vllm_status"])
|
|
146
|
+
|
|
147
|
+
while not getattr(self.card_monitor_thread, "_stop_event", False):
|
|
148
|
+
try:
|
|
149
|
+
self.status_card.on_update(
|
|
150
|
+
current.card["vllm_status"], None
|
|
151
|
+
)
|
|
152
|
+
import time
|
|
153
|
+
|
|
154
|
+
time.sleep(self.attributes["card_refresh_interval"])
|
|
155
|
+
except Exception as e:
|
|
156
|
+
if self.attributes["debug"]:
|
|
157
|
+
print(f"[@vllm] Card monitoring error: {e}")
|
|
158
|
+
break
|
|
159
|
+
except Exception as e:
|
|
160
|
+
if self.attributes["debug"]:
|
|
161
|
+
print(f"[@vllm] Card monitor thread error: {e}")
|
|
162
|
+
self.status_card.on_error(current.card["vllm_status"], str(e))
|
|
163
|
+
|
|
164
|
+
self.card_monitor_thread = threading.Thread(
|
|
165
|
+
target=monitor_card, daemon=True
|
|
166
|
+
)
|
|
167
|
+
self.card_monitor_thread._stop_event = False
|
|
168
|
+
self.card_monitor_thread.start()
|
|
169
|
+
self.vllm_manager = VLLMOpenAIManager(
|
|
170
|
+
model=self.attributes["model"],
|
|
171
|
+
backend=self.attributes["backend"],
|
|
172
|
+
debug=self.attributes["debug"],
|
|
173
|
+
status_card=self.status_card,
|
|
174
|
+
max_retries=self.attributes["max_retries"],
|
|
175
|
+
retry_alert_frequency=self.attributes["retry_alert_frequency"],
|
|
176
|
+
stream_logs_to_card=self.attributes["stream_logs_to_card"],
|
|
177
|
+
**self.attributes["engine_args"],
|
|
178
|
+
)
|
|
179
|
+
current._update_env(
|
|
180
|
+
dict(
|
|
181
|
+
vllm=OpenAIAPIInfo(
|
|
182
|
+
local_endpoint=f"http://127.0.0.1:{self.vllm_manager.port}/v1",
|
|
183
|
+
local_api_key="token123",
|
|
184
|
+
)
|
|
123
185
|
)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if self.attributes["debug"]:
|
|
189
|
+
print("[@vllm] API server mode initialized.")
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
if self.status_card:
|
|
193
|
+
self.status_card.add_event("error", f"Initialization failed: {str(e)}")
|
|
194
|
+
try:
|
|
195
|
+
self.status_card.on_error(current.card["vllm_status"], str(e))
|
|
196
|
+
except:
|
|
197
|
+
pass
|
|
198
|
+
print(f"[@vllm] Error initializing API server mode: {e}")
|
|
199
|
+
raise
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
if self.status_card:
|
|
203
|
+
self.status_card.add_event("info", "Starting user step function")
|
|
204
|
+
step_func()
|
|
205
|
+
if self.status_card:
|
|
206
|
+
self.status_card.add_event(
|
|
207
|
+
"success", "User step function completed successfully"
|
|
133
208
|
)
|
|
134
|
-
|
|
135
|
-
|
|
209
|
+
finally:
|
|
210
|
+
if self.vllm_manager:
|
|
211
|
+
self.vllm_manager.terminate_models()
|
|
136
212
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
self.status_card.on_error(current.card["vllm_status"], str(e))
|
|
144
|
-
except:
|
|
145
|
-
pass
|
|
146
|
-
print(f"[@vllm] Error initializing VLLMManager: {e}")
|
|
147
|
-
raise
|
|
148
|
-
|
|
149
|
-
try:
|
|
150
|
-
if self.status_card:
|
|
151
|
-
self.status_card.add_event("info", "Starting user step function")
|
|
152
|
-
step_func()
|
|
153
|
-
if self.status_card:
|
|
154
|
-
self.status_card.add_event(
|
|
155
|
-
"success", "User step function completed successfully"
|
|
156
|
-
)
|
|
157
|
-
finally:
|
|
158
|
-
if self.vllm_manager:
|
|
159
|
-
self.vllm_manager.terminate_models()
|
|
160
|
-
|
|
161
|
-
if self.card_monitor_thread and self.status_card:
|
|
162
|
-
import time
|
|
163
|
-
|
|
164
|
-
try:
|
|
165
|
-
self.status_card.on_update(current.card["vllm_status"], None)
|
|
166
|
-
except Exception as e:
|
|
167
|
-
if self.attributes["debug"]:
|
|
168
|
-
print(f"[@vllm] Final card update error: {e}")
|
|
169
|
-
time.sleep(2)
|
|
170
|
-
|
|
171
|
-
if self.card_monitor_thread:
|
|
172
|
-
self.card_monitor_thread._stop_event = True
|
|
173
|
-
self.card_monitor_thread.join(timeout=5)
|
|
213
|
+
if self.card_monitor_thread and self.status_card:
|
|
214
|
+
import time
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
self.status_card.on_update(current.card["vllm_status"], None)
|
|
218
|
+
except Exception as e:
|
|
174
219
|
if self.attributes["debug"]:
|
|
175
|
-
print("[@vllm]
|
|
220
|
+
print(f"[@vllm] Final card update error: {e}")
|
|
221
|
+
time.sleep(2)
|
|
176
222
|
|
|
177
|
-
|
|
223
|
+
if self.card_monitor_thread:
|
|
224
|
+
self.card_monitor_thread._stop_event = True
|
|
225
|
+
self.card_monitor_thread.join(timeout=5)
|
|
226
|
+
if self.attributes["debug"]:
|
|
227
|
+
print("[@vllm] Card monitoring thread stopped.")
|
|
228
|
+
|
|
229
|
+
def _run_native_engine_mode(self, step_func):
|
|
230
|
+
"""Run vLLM in native engine mode (direct LLM API access)"""
|
|
231
|
+
self.vllm = None
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
if self.attributes["debug"]:
|
|
235
|
+
print("[@vllm] Initializing native engine mode")
|
|
236
|
+
|
|
237
|
+
self.vllm = VLLMPyManager(
|
|
238
|
+
model=self.attributes["model"],
|
|
239
|
+
debug=self.attributes["debug"],
|
|
240
|
+
**self.attributes["engine_args"],
|
|
241
|
+
)
|
|
242
|
+
current._update_env(dict(vllm=VLLM(llm=self.vllm.engine)))
|
|
243
|
+
|
|
244
|
+
if self.attributes["debug"]:
|
|
245
|
+
print("[@vllm] Native engine mode initialized.")
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
print(f"[@vllm] Error initializing native engine mode: {e}")
|
|
249
|
+
raise
|
|
250
|
+
|
|
251
|
+
try:
|
|
252
|
+
step_func()
|
|
253
|
+
finally:
|
|
254
|
+
if self.vllm:
|
|
255
|
+
self.vllm.terminate_engine()
|
|
@@ -160,10 +160,10 @@ class VLLMStatusCard(CardRefresher):
|
|
|
160
160
|
# Keep only last 10 events
|
|
161
161
|
self.status_data["events"] = self.status_data["events"][:10]
|
|
162
162
|
|
|
163
|
-
def get_circuit_breaker_emoji(self, state):
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
163
|
+
# def get_circuit_breaker_emoji(self, state):
|
|
164
|
+
# """Get status emoji for circuit breaker state"""
|
|
165
|
+
# emoji_map = {"CLOSED": "🟢", "OPEN": "🔴", "HALF_OPEN": "🟡"}
|
|
166
|
+
# return emoji_map.get(state, "⚪")
|
|
167
167
|
|
|
168
168
|
def get_uptime_string(self, start_time):
|
|
169
169
|
"""Calculate uptime string"""
|
|
@@ -252,11 +252,11 @@ class VLLMStatusCard(CardRefresher):
|
|
|
252
252
|
)
|
|
253
253
|
|
|
254
254
|
# Simplified monitoring note
|
|
255
|
-
current_card.append(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
)
|
|
255
|
+
# current_card.append(
|
|
256
|
+
# Markdown(
|
|
257
|
+
# "## 🔧 Monitoring\n**Advanced Features:** Disabled (Circuit Breaker, Request Interception)"
|
|
258
|
+
# )
|
|
259
|
+
# )
|
|
260
260
|
|
|
261
261
|
# Performance metrics
|
|
262
262
|
perf_data = data["performance"]
|