nemo-evaluator-launcher 0.1.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nemo-evaluator-launcher might be problematic. Click here for more details.
- nemo_evaluator_launcher/__init__.py +79 -0
- nemo_evaluator_launcher/api/__init__.py +24 -0
- nemo_evaluator_launcher/api/functional.py +698 -0
- nemo_evaluator_launcher/api/types.py +98 -0
- nemo_evaluator_launcher/api/utils.py +19 -0
- nemo_evaluator_launcher/cli/__init__.py +15 -0
- nemo_evaluator_launcher/cli/export.py +267 -0
- nemo_evaluator_launcher/cli/info.py +512 -0
- nemo_evaluator_launcher/cli/kill.py +41 -0
- nemo_evaluator_launcher/cli/ls_runs.py +134 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +136 -0
- nemo_evaluator_launcher/cli/main.py +226 -0
- nemo_evaluator_launcher/cli/run.py +200 -0
- nemo_evaluator_launcher/cli/status.py +164 -0
- nemo_evaluator_launcher/cli/version.py +55 -0
- nemo_evaluator_launcher/common/__init__.py +16 -0
- nemo_evaluator_launcher/common/execdb.py +283 -0
- nemo_evaluator_launcher/common/helpers.py +366 -0
- nemo_evaluator_launcher/common/logging_utils.py +357 -0
- nemo_evaluator_launcher/common/mapping.py +295 -0
- nemo_evaluator_launcher/common/printing_utils.py +93 -0
- nemo_evaluator_launcher/configs/__init__.py +15 -0
- nemo_evaluator_launcher/configs/default.yaml +28 -0
- nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
- nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
- nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +42 -0
- nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
- nemo_evaluator_launcher/configs/execution/local.yaml +19 -0
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +34 -0
- nemo_evaluator_launcher/executors/__init__.py +22 -0
- nemo_evaluator_launcher/executors/base.py +120 -0
- nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +609 -0
- nemo_evaluator_launcher/executors/lepton/executor.py +1004 -0
- nemo_evaluator_launcher/executors/lepton/job_helpers.py +398 -0
- nemo_evaluator_launcher/executors/local/__init__.py +15 -0
- nemo_evaluator_launcher/executors/local/executor.py +605 -0
- nemo_evaluator_launcher/executors/local/run.template.sh +103 -0
- nemo_evaluator_launcher/executors/registry.py +38 -0
- nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
- nemo_evaluator_launcher/executors/slurm/executor.py +1147 -0
- nemo_evaluator_launcher/exporters/__init__.py +36 -0
- nemo_evaluator_launcher/exporters/base.py +121 -0
- nemo_evaluator_launcher/exporters/gsheets.py +409 -0
- nemo_evaluator_launcher/exporters/local.py +502 -0
- nemo_evaluator_launcher/exporters/mlflow.py +619 -0
- nemo_evaluator_launcher/exporters/registry.py +40 -0
- nemo_evaluator_launcher/exporters/utils.py +624 -0
- nemo_evaluator_launcher/exporters/wandb.py +490 -0
- nemo_evaluator_launcher/package_info.py +38 -0
- nemo_evaluator_launcher/resources/mapping.toml +380 -0
- nemo_evaluator_launcher-0.1.28.dist-info/METADATA +494 -0
- nemo_evaluator_launcher-0.1.28.dist-info/RECORD +60 -0
- nemo_evaluator_launcher-0.1.28.dist-info/WHEEL +5 -0
- nemo_evaluator_launcher-0.1.28.dist-info/entry_points.txt +3 -0
- nemo_evaluator_launcher-0.1.28.dist-info/licenses/LICENSE +451 -0
- nemo_evaluator_launcher-0.1.28.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Lepton deployment helper functions for nemo-evaluator-launcher.
|
|
17
|
+
|
|
18
|
+
Handles Lepton endpoint creation, management, and health checks.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import subprocess
|
|
23
|
+
import time
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any, Dict, Optional
|
|
26
|
+
|
|
27
|
+
# Import lepton dependencies
|
|
28
|
+
from omegaconf import DictConfig
|
|
29
|
+
|
|
30
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def deep_merge(base: Dict[Any, Any], override: Dict[Any, Any]) -> Dict[Any, Any]:
|
|
34
|
+
"""Deep merge two dictionaries, with override taking precedence."""
|
|
35
|
+
result = base.copy()
|
|
36
|
+
|
|
37
|
+
for key, value in override.items():
|
|
38
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
39
|
+
result[key] = deep_merge(result[key], value)
|
|
40
|
+
else:
|
|
41
|
+
result[key] = value
|
|
42
|
+
|
|
43
|
+
return result
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def replace_placeholders(data: Any, replacements: Dict[str, str]) -> Any:
|
|
47
|
+
"""Replace placeholders in the data structure."""
|
|
48
|
+
|
|
49
|
+
def replace_in_obj(obj: Any) -> Any:
|
|
50
|
+
if isinstance(obj, dict):
|
|
51
|
+
return {k: replace_in_obj(v) for k, v in obj.items()}
|
|
52
|
+
elif isinstance(obj, list):
|
|
53
|
+
return [replace_in_obj(item) for item in obj]
|
|
54
|
+
elif isinstance(obj, str):
|
|
55
|
+
result = obj
|
|
56
|
+
for placeholder, value in replacements.items():
|
|
57
|
+
result = result.replace(f"{{{{{placeholder}}}}}", value)
|
|
58
|
+
return result
|
|
59
|
+
else:
|
|
60
|
+
return obj
|
|
61
|
+
|
|
62
|
+
return replace_in_obj(data)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def generate_lepton_spec(cfg: DictConfig) -> Dict[str, Any]:
|
|
66
|
+
"""Generate a Lepton endpoint specification from nemo-evaluator-launcher configuration.
|
|
67
|
+
|
|
68
|
+
This function creates a layered configuration by merging:
|
|
69
|
+
1. Platform defaults (from execution.lepton_platform.platform_defaults)
|
|
70
|
+
2. Environment settings (from execution.lepton_platform)
|
|
71
|
+
3. Inference engine config (from deployment.* - vllm/sglang settings)
|
|
72
|
+
4. Lepton platform config (from deployment.lepton_config - Lepton-specific settings)
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
cfg: The nemo-evaluator-launcher configuration object containing all settings.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict containing the Lepton endpoint specification.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Step 1: Start with platform defaults from execution config
|
|
82
|
+
platform_defaults = {}
|
|
83
|
+
if hasattr(cfg, "execution") and hasattr(cfg.execution, "lepton_platform"):
|
|
84
|
+
deployment_config = cfg.execution.lepton_platform.get("deployment", {})
|
|
85
|
+
platform_defaults = deployment_config.get("platform_defaults", {})
|
|
86
|
+
|
|
87
|
+
base_config = deep_merge({}, platform_defaults)
|
|
88
|
+
|
|
89
|
+
# Step 2: Apply deployment-specific settings from execution config
|
|
90
|
+
if hasattr(cfg, "execution") and hasattr(cfg.execution, "lepton_platform"):
|
|
91
|
+
lepton_platform = cfg.execution.lepton_platform
|
|
92
|
+
deployment_config = lepton_platform.get("deployment", {})
|
|
93
|
+
|
|
94
|
+
# Add deployment node group as affinity constraint
|
|
95
|
+
deployment_node_group = deployment_config.get("node_group")
|
|
96
|
+
if deployment_node_group:
|
|
97
|
+
if not base_config.get("resource_requirement"):
|
|
98
|
+
base_config["resource_requirement"] = {}
|
|
99
|
+
base_config["resource_requirement"]["affinity"] = {
|
|
100
|
+
"allowed_dedicated_node_groups": [deployment_node_group]
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Add queue config from platform defaults
|
|
104
|
+
platform_defaults = deployment_config.get("platform_defaults", {})
|
|
105
|
+
if platform_defaults.get("queue_config"):
|
|
106
|
+
base_config["queue_config"] = platform_defaults.get("queue_config")
|
|
107
|
+
|
|
108
|
+
# Step 3: Get Lepton-specific config from deployment.lepton_config
|
|
109
|
+
if not hasattr(cfg.deployment, "lepton_config"):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
"deployment.lepton_config is required when using Lepton executor"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
lepton_config = cfg.deployment.lepton_config
|
|
115
|
+
|
|
116
|
+
# Step 4: Convert inference engine config to container spec
|
|
117
|
+
container_spec = _create_inference_container_spec(cfg.deployment)
|
|
118
|
+
|
|
119
|
+
# Step 5: Apply Lepton platform deployment configurations
|
|
120
|
+
deployment_config = {
|
|
121
|
+
"resource_requirement": {
|
|
122
|
+
**base_config.get("resource_requirement", {}),
|
|
123
|
+
"resource_shape": lepton_config.resource_shape,
|
|
124
|
+
"min_replicas": lepton_config.min_replicas,
|
|
125
|
+
"max_replicas": lepton_config.max_replicas,
|
|
126
|
+
},
|
|
127
|
+
"auto_scaler": lepton_config.auto_scaler,
|
|
128
|
+
"container": container_spec,
|
|
129
|
+
"envs": [],
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
# Add health check configuration if provided
|
|
133
|
+
if hasattr(lepton_config, "health") and lepton_config.health:
|
|
134
|
+
deployment_config["health"] = lepton_config.health
|
|
135
|
+
# Merge deployment config into base config
|
|
136
|
+
final_config = deep_merge(base_config, deployment_config)
|
|
137
|
+
|
|
138
|
+
# Step 6: Add environment variables from lepton_config
|
|
139
|
+
if hasattr(lepton_config, "envs") and lepton_config.envs:
|
|
140
|
+
from omegaconf import DictConfig
|
|
141
|
+
|
|
142
|
+
for key, value in lepton_config.envs.items():
|
|
143
|
+
env_var: Dict[str, Any] = {"name": key}
|
|
144
|
+
|
|
145
|
+
# Support both direct values and secret references
|
|
146
|
+
if isinstance(value, (dict, DictConfig)) and "value_from" in value:
|
|
147
|
+
# Secret reference: {value_from: {secret_name_ref: "secret_name"}}
|
|
148
|
+
env_var["value_from"] = dict(value["value_from"])
|
|
149
|
+
else:
|
|
150
|
+
# Direct value: "direct_value"
|
|
151
|
+
env_var["value"] = str(value)
|
|
152
|
+
|
|
153
|
+
final_config["envs"].append(env_var)
|
|
154
|
+
|
|
155
|
+
# Step 6b: Auto-populate environment variables from deployment parameters
|
|
156
|
+
_add_deployment_derived_envs(final_config["envs"], cfg.deployment)
|
|
157
|
+
|
|
158
|
+
# Step 7: Add mounts with intelligent path construction
|
|
159
|
+
if hasattr(lepton_config, "mounts") and lepton_config.mounts.enabled:
|
|
160
|
+
# Get storage source from task config mounts (since mounts are shared between tasks and deployments)
|
|
161
|
+
storage_source = "node-nfs:lepton-shared-fs" # default
|
|
162
|
+
if hasattr(cfg, "execution") and hasattr(cfg.execution, "lepton_platform"):
|
|
163
|
+
task_config = cfg.execution.lepton_platform.get("tasks", {})
|
|
164
|
+
task_mounts = task_config.get("mounts", [])
|
|
165
|
+
if task_mounts:
|
|
166
|
+
storage_source = task_mounts[0].get("from", storage_source)
|
|
167
|
+
|
|
168
|
+
final_config["mounts"] = [
|
|
169
|
+
{
|
|
170
|
+
"path": lepton_config.mounts.cache_path,
|
|
171
|
+
"from": storage_source,
|
|
172
|
+
"mount_path": lepton_config.mounts.mount_path,
|
|
173
|
+
"mount_options": {},
|
|
174
|
+
}
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
# Step 8: Extract image_pull_secrets to top level (required by Lepton API)
|
|
178
|
+
if "image_pull_secrets" in final_config:
|
|
179
|
+
image_pull_secrets = final_config["image_pull_secrets"]
|
|
180
|
+
# Convert OmegaConf ListConfig to regular Python list
|
|
181
|
+
from omegaconf import ListConfig
|
|
182
|
+
|
|
183
|
+
if isinstance(image_pull_secrets, (list, ListConfig)):
|
|
184
|
+
final_config["image_pull_secrets"] = list(image_pull_secrets)
|
|
185
|
+
else:
|
|
186
|
+
# Remove invalid image_pull_secrets
|
|
187
|
+
final_config.pop("image_pull_secrets", None)
|
|
188
|
+
|
|
189
|
+
# Step 9: Add API tokens if provided (supports both single and multiple tokens)
|
|
190
|
+
if hasattr(lepton_config, "api_tokens") and lepton_config.api_tokens:
|
|
191
|
+
from omegaconf import DictConfig
|
|
192
|
+
|
|
193
|
+
api_tokens_list = []
|
|
194
|
+
|
|
195
|
+
for token_config in lepton_config.api_tokens:
|
|
196
|
+
token_var: Dict[str, Any] = {}
|
|
197
|
+
|
|
198
|
+
# Support both direct values and secret references
|
|
199
|
+
if isinstance(token_config, (dict, DictConfig)):
|
|
200
|
+
if "value" in token_config:
|
|
201
|
+
# Direct value: {value: "token_string"}
|
|
202
|
+
token_var["value"] = str(token_config["value"])
|
|
203
|
+
elif "value_from" in token_config:
|
|
204
|
+
# Secret reference: {value_from: {secret_name_ref: "secret_name"}}
|
|
205
|
+
token_var["value_from"] = dict(token_config["value_from"])
|
|
206
|
+
else:
|
|
207
|
+
# Simple string value
|
|
208
|
+
token_var["value"] = str(token_config)
|
|
209
|
+
|
|
210
|
+
api_tokens_list.append(token_var)
|
|
211
|
+
|
|
212
|
+
final_config["api_tokens"] = api_tokens_list
|
|
213
|
+
|
|
214
|
+
# Backward compatibility: support legacy single api_token
|
|
215
|
+
elif hasattr(lepton_config, "api_token") and lepton_config.api_token:
|
|
216
|
+
final_config["api_tokens"] = [{"value": lepton_config.api_token}]
|
|
217
|
+
|
|
218
|
+
# Step 10: Replace placeholders
|
|
219
|
+
replacements = {
|
|
220
|
+
"MODEL_CACHE_NAME": _generate_model_cache_name(cfg.deployment.image)
|
|
221
|
+
}
|
|
222
|
+
final_config_with_replacements: Dict[str, Any] = replace_placeholders(
|
|
223
|
+
final_config, replacements
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return final_config_with_replacements
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, Any]:
|
|
230
|
+
"""Create container specification from inference engine config (vLLM/SGLang/NIM).
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
deployment_cfg: Deployment configuration containing vLLM/SGLang/NIM settings.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Container specification for Lepton.
|
|
237
|
+
"""
|
|
238
|
+
container_spec = {
|
|
239
|
+
"image": deployment_cfg.image,
|
|
240
|
+
"ports": [{"container_port": deployment_cfg.port}],
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Generate command based on deployment type
|
|
244
|
+
if deployment_cfg.type == "vllm":
|
|
245
|
+
# Convert vLLM command template to actual command
|
|
246
|
+
command_parts = [
|
|
247
|
+
"vllm",
|
|
248
|
+
"serve",
|
|
249
|
+
deployment_cfg.checkpoint_path,
|
|
250
|
+
f"--tensor-parallel-size={deployment_cfg.tensor_parallel_size}",
|
|
251
|
+
f"--pipeline-parallel-size={deployment_cfg.pipeline_parallel_size}",
|
|
252
|
+
f"--data-parallel-size={deployment_cfg.data_parallel_size}",
|
|
253
|
+
f"--port={deployment_cfg.port}",
|
|
254
|
+
f"--served-model-name={deployment_cfg.served_model_name}",
|
|
255
|
+
]
|
|
256
|
+
|
|
257
|
+
# Add extra args if provided
|
|
258
|
+
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
259
|
+
command_parts.extend(deployment_cfg.extra_args.split())
|
|
260
|
+
|
|
261
|
+
container_spec["command"] = command_parts
|
|
262
|
+
|
|
263
|
+
elif deployment_cfg.type == "sglang":
|
|
264
|
+
# Convert SGLang command template to actual command
|
|
265
|
+
command_parts = [
|
|
266
|
+
"python3",
|
|
267
|
+
"-m",
|
|
268
|
+
"sglang.launch_server",
|
|
269
|
+
f"--model-path={deployment_cfg.checkpoint_path}",
|
|
270
|
+
"--host=0.0.0.0",
|
|
271
|
+
f"--port={deployment_cfg.port}",
|
|
272
|
+
f"--served-model-name={deployment_cfg.served_model_name}",
|
|
273
|
+
f"--tp={deployment_cfg.tensor_parallel_size}",
|
|
274
|
+
f"--dp={deployment_cfg.data_parallel_size}",
|
|
275
|
+
]
|
|
276
|
+
|
|
277
|
+
# Add extra args if provided
|
|
278
|
+
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
279
|
+
command_parts.extend(deployment_cfg.extra_args.split())
|
|
280
|
+
|
|
281
|
+
container_spec["command"] = command_parts
|
|
282
|
+
|
|
283
|
+
elif deployment_cfg.type == "nim":
|
|
284
|
+
# NIM containers use their default entrypoint - no custom command needed
|
|
285
|
+
# Configuration is handled via environment variables
|
|
286
|
+
pass
|
|
287
|
+
|
|
288
|
+
return container_spec
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _add_deployment_derived_envs(envs_list: list, deployment_cfg: DictConfig) -> None:
|
|
292
|
+
"""Add environment variables derived from deployment configuration.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
envs_list: List to append environment variables to.
|
|
296
|
+
deployment_cfg: Deployment configuration to derive from.
|
|
297
|
+
"""
|
|
298
|
+
deployment_type = deployment_cfg.type
|
|
299
|
+
|
|
300
|
+
# Common environment variables for all deployment types
|
|
301
|
+
if (
|
|
302
|
+
hasattr(deployment_cfg, "served_model_name")
|
|
303
|
+
and deployment_cfg.served_model_name
|
|
304
|
+
):
|
|
305
|
+
envs_list.append(
|
|
306
|
+
{"name": "SERVED_MODEL_NAME", "value": deployment_cfg.served_model_name}
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if hasattr(deployment_cfg, "port") and deployment_cfg.port:
|
|
310
|
+
envs_list.append({"name": "MODEL_PORT", "value": str(deployment_cfg.port)})
|
|
311
|
+
|
|
312
|
+
# Deployment-specific environment variables
|
|
313
|
+
if deployment_type == "vllm":
|
|
314
|
+
if (
|
|
315
|
+
hasattr(deployment_cfg, "checkpoint_path")
|
|
316
|
+
and deployment_cfg.checkpoint_path
|
|
317
|
+
):
|
|
318
|
+
envs_list.append(
|
|
319
|
+
{"name": "MODEL_PATH", "value": deployment_cfg.checkpoint_path}
|
|
320
|
+
)
|
|
321
|
+
if (
|
|
322
|
+
hasattr(deployment_cfg, "tensor_parallel_size")
|
|
323
|
+
and deployment_cfg.tensor_parallel_size
|
|
324
|
+
):
|
|
325
|
+
envs_list.append(
|
|
326
|
+
{
|
|
327
|
+
"name": "TENSOR_PARALLEL_SIZE",
|
|
328
|
+
"value": str(deployment_cfg.tensor_parallel_size),
|
|
329
|
+
}
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
elif deployment_type == "sglang":
|
|
333
|
+
if (
|
|
334
|
+
hasattr(deployment_cfg, "checkpoint_path")
|
|
335
|
+
and deployment_cfg.checkpoint_path
|
|
336
|
+
):
|
|
337
|
+
envs_list.append(
|
|
338
|
+
{"name": "MODEL_PATH", "value": deployment_cfg.checkpoint_path}
|
|
339
|
+
)
|
|
340
|
+
if (
|
|
341
|
+
hasattr(deployment_cfg, "tensor_parallel_size")
|
|
342
|
+
and deployment_cfg.tensor_parallel_size
|
|
343
|
+
):
|
|
344
|
+
envs_list.append(
|
|
345
|
+
{
|
|
346
|
+
"name": "TENSOR_PARALLEL_SIZE",
|
|
347
|
+
"value": str(deployment_cfg.tensor_parallel_size),
|
|
348
|
+
}
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
elif deployment_type == "nim":
|
|
352
|
+
# NIM-specific derived environment variables
|
|
353
|
+
if (
|
|
354
|
+
hasattr(deployment_cfg, "served_model_name")
|
|
355
|
+
and deployment_cfg.served_model_name
|
|
356
|
+
):
|
|
357
|
+
envs_list.append(
|
|
358
|
+
{"name": "NIM_MODEL_NAME", "value": deployment_cfg.served_model_name}
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _generate_model_cache_name(image: str) -> str:
|
|
363
|
+
"""Generate a cache directory name from the container image.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
image: Container image string like 'nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.6'
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Clean cache name like 'llama-3-1-8b-instruct'
|
|
370
|
+
"""
|
|
371
|
+
# Extract model name from image path
|
|
372
|
+
if "/" in image:
|
|
373
|
+
model_part = image.split("/")[-1] # Get 'llama-3.1-8b-instruct:1.8.6'
|
|
374
|
+
else:
|
|
375
|
+
model_part = image
|
|
376
|
+
|
|
377
|
+
# Remove version tag
|
|
378
|
+
if ":" in model_part:
|
|
379
|
+
model_part = model_part.split(":")[0] # Get 'llama-3.1-8b-instruct'
|
|
380
|
+
|
|
381
|
+
# Replace dots with dashes for filesystem compatibility
|
|
382
|
+
return model_part.replace(".", "-")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def create_lepton_endpoint(cfg: DictConfig, endpoint_name: str) -> bool:
|
|
386
|
+
"""Create a Lepton endpoint using the lep CLI.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
cfg: The nemo-evaluator-launcher configuration object.
|
|
390
|
+
endpoint_name: Name for the endpoint.
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
True if endpoint creation succeeded, False otherwise.
|
|
394
|
+
"""
|
|
395
|
+
spec = generate_lepton_spec(cfg)
|
|
396
|
+
|
|
397
|
+
# Convert OmegaConf objects to regular Python objects for JSON serialization
|
|
398
|
+
from omegaconf import DictConfig, ListConfig
|
|
399
|
+
|
|
400
|
+
def convert_to_json_serializable(obj: Any) -> Any:
|
|
401
|
+
"""Recursively convert OmegaConf objects to regular Python objects."""
|
|
402
|
+
if isinstance(obj, (DictConfig, dict)):
|
|
403
|
+
return {k: convert_to_json_serializable(v) for k, v in obj.items()}
|
|
404
|
+
elif isinstance(obj, (ListConfig, list)):
|
|
405
|
+
return [convert_to_json_serializable(item) for item in obj]
|
|
406
|
+
else:
|
|
407
|
+
return obj
|
|
408
|
+
|
|
409
|
+
json_spec = convert_to_json_serializable(spec)
|
|
410
|
+
|
|
411
|
+
# Write spec to temporary file
|
|
412
|
+
import tempfile
|
|
413
|
+
|
|
414
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
415
|
+
json.dump(json_spec, f, indent=2)
|
|
416
|
+
spec_file = f.name
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
# Create endpoint using lep CLI
|
|
420
|
+
result = subprocess.run(
|
|
421
|
+
["lep", "endpoint", "create", "--file", spec_file, "--name", endpoint_name],
|
|
422
|
+
capture_output=True,
|
|
423
|
+
text=True,
|
|
424
|
+
timeout=300,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
if result.returncode == 0:
|
|
428
|
+
print(f"✅ Successfully created Lepton endpoint: {endpoint_name}")
|
|
429
|
+
return True
|
|
430
|
+
else:
|
|
431
|
+
error_msg = result.stderr.strip() if result.stderr else ""
|
|
432
|
+
output_msg = result.stdout.strip() if result.stdout else ""
|
|
433
|
+
print(
|
|
434
|
+
f"✗ Failed to create Lepton endpoint | Endpoint: {endpoint_name} | Return code: {result.returncode}"
|
|
435
|
+
)
|
|
436
|
+
if error_msg:
|
|
437
|
+
print(f" stderr: {error_msg}")
|
|
438
|
+
if output_msg:
|
|
439
|
+
print(f" stdout: {output_msg}")
|
|
440
|
+
return False
|
|
441
|
+
|
|
442
|
+
except subprocess.TimeoutExpired as e:
|
|
443
|
+
print(
|
|
444
|
+
f"✗ Timeout creating Lepton endpoint | Endpoint: {endpoint_name} | Timeout: 300s"
|
|
445
|
+
)
|
|
446
|
+
if hasattr(e, "stderr") and e.stderr:
|
|
447
|
+
print(f" stderr: {e.stderr}")
|
|
448
|
+
if hasattr(e, "stdout") and e.stdout:
|
|
449
|
+
print(f" stdout: {e.stdout}")
|
|
450
|
+
return False
|
|
451
|
+
except subprocess.CalledProcessError as e:
|
|
452
|
+
print(
|
|
453
|
+
f"✗ Error creating Lepton endpoint | Endpoint: {endpoint_name} | Error: {e}"
|
|
454
|
+
)
|
|
455
|
+
if hasattr(e, "stderr") and e.stderr:
|
|
456
|
+
print(f" stderr: {e.stderr}")
|
|
457
|
+
if hasattr(e, "stdout") and e.stdout:
|
|
458
|
+
print(f" stdout: {e.stdout}")
|
|
459
|
+
return False
|
|
460
|
+
finally:
|
|
461
|
+
# Clean up temporary file
|
|
462
|
+
Path(spec_file).unlink(missing_ok=True)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def delete_lepton_endpoint(endpoint_name: str) -> bool:
|
|
466
|
+
"""Delete a Lepton endpoint.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
endpoint_name: Name of the endpoint to delete.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
True if deletion succeeded, False otherwise.
|
|
473
|
+
"""
|
|
474
|
+
try:
|
|
475
|
+
result = subprocess.run(
|
|
476
|
+
["lep", "endpoint", "remove", "--name", endpoint_name],
|
|
477
|
+
capture_output=True,
|
|
478
|
+
text=True,
|
|
479
|
+
timeout=60,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
return result.returncode == 0
|
|
483
|
+
except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
|
|
484
|
+
return False
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def get_lepton_endpoint_status(endpoint_name: str) -> Optional[dict[str, Any]]:
|
|
488
|
+
"""Get the status of a Lepton endpoint.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
endpoint_name: Name of the endpoint.
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
Status dict if endpoint exists, None otherwise. See
|
|
495
|
+
https://github.com/leptonai/leptonai/blob/7de93b95357126da1e86fa99f54f9a769d5d2646/leptonai/api/v1/types/deployment.py#L338
|
|
496
|
+
for the definition.
|
|
497
|
+
"""
|
|
498
|
+
try:
|
|
499
|
+
# TODO(agronskiy): why not use Python API?
|
|
500
|
+
cmd = ["lep", "endpoint", "get", "--name", endpoint_name]
|
|
501
|
+
result = subprocess.run(
|
|
502
|
+
cmd,
|
|
503
|
+
capture_output=True,
|
|
504
|
+
text=True,
|
|
505
|
+
timeout=30,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
if result.returncode != 0:
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
endpoint_info = json.loads(result.stdout)
|
|
512
|
+
status = endpoint_info.get("status", {})
|
|
513
|
+
if isinstance(status, dict):
|
|
514
|
+
return status
|
|
515
|
+
logger.error(
|
|
516
|
+
"Result of running lep command returne non-dict status",
|
|
517
|
+
cmd=cmd,
|
|
518
|
+
status=status,
|
|
519
|
+
)
|
|
520
|
+
return None
|
|
521
|
+
|
|
522
|
+
except (
|
|
523
|
+
subprocess.TimeoutExpired,
|
|
524
|
+
subprocess.CalledProcessError,
|
|
525
|
+
json.JSONDecodeError,
|
|
526
|
+
):
|
|
527
|
+
return None
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def wait_for_lepton_endpoint_ready(endpoint_name: str, timeout: int = 600) -> bool:
|
|
531
|
+
"""Wait for a Lepton endpoint to become ready.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
endpoint_name: Name of the endpoint.
|
|
535
|
+
timeout: Maximum time to wait in seconds.
|
|
536
|
+
|
|
537
|
+
Returns:
|
|
538
|
+
True if endpoint becomes ready, False if timeout.
|
|
539
|
+
"""
|
|
540
|
+
start_time = time.time()
|
|
541
|
+
while time.time() - start_time < timeout:
|
|
542
|
+
status = get_lepton_endpoint_status(endpoint_name)
|
|
543
|
+
|
|
544
|
+
# `get_lepton_endpoint_status` might return `None` if
|
|
545
|
+
# e.g. there was a network error, see definition.
|
|
546
|
+
if status is not None:
|
|
547
|
+
state = status.get("state", "").lower()
|
|
548
|
+
if state == "ready":
|
|
549
|
+
logger.info(
|
|
550
|
+
"Lepton endpoint is ready",
|
|
551
|
+
endpoint_name=endpoint_name,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
return True
|
|
555
|
+
elif state in ["failed", "error"]:
|
|
556
|
+
return False
|
|
557
|
+
|
|
558
|
+
logger.debug(
|
|
559
|
+
"Waiting for lepton endpoint",
|
|
560
|
+
endpoint_name=endpoint_name,
|
|
561
|
+
timeout=timeout,
|
|
562
|
+
time_delta=time.time() - start_time,
|
|
563
|
+
curr_status=status,
|
|
564
|
+
)
|
|
565
|
+
time.sleep(10)
|
|
566
|
+
|
|
567
|
+
logger.error(
|
|
568
|
+
"Timeout waiting for lepton endpoint",
|
|
569
|
+
endpoint_name=endpoint_name,
|
|
570
|
+
timeout=timeout,
|
|
571
|
+
)
|
|
572
|
+
return False
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def get_lepton_endpoint_url(endpoint_name: str) -> Optional[str]:
|
|
576
|
+
"""Get the URL of a Lepton endpoint.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
endpoint_name: Name of the endpoint.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Endpoint URL if available, None otherwise.
|
|
583
|
+
"""
|
|
584
|
+
try:
|
|
585
|
+
result = subprocess.run(
|
|
586
|
+
["lep", "endpoint", "get", "--name", endpoint_name],
|
|
587
|
+
capture_output=True,
|
|
588
|
+
text=True,
|
|
589
|
+
timeout=30,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
if result.returncode == 0:
|
|
593
|
+
endpoint_info = json.loads(result.stdout)
|
|
594
|
+
status = endpoint_info.get("status", {})
|
|
595
|
+
endpoint = status.get("endpoint", {})
|
|
596
|
+
external_endpoint = endpoint.get("external_endpoint")
|
|
597
|
+
# Ensure we return a proper string type or None
|
|
598
|
+
if isinstance(external_endpoint, str):
|
|
599
|
+
return external_endpoint
|
|
600
|
+
else:
|
|
601
|
+
return None
|
|
602
|
+
else:
|
|
603
|
+
return None
|
|
604
|
+
except (
|
|
605
|
+
subprocess.TimeoutExpired,
|
|
606
|
+
subprocess.CalledProcessError,
|
|
607
|
+
json.JSONDecodeError,
|
|
608
|
+
):
|
|
609
|
+
return None
|