nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +105 -1
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/main.py +12 -0
- nemo_evaluator_launcher/cli/run.py +73 -15
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/helpers.py +176 -43
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/printing_utils.py +7 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
- nemo_evaluator_launcher/executors/base.py +31 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
- nemo_evaluator_launcher/executors/lepton/executor.py +81 -1
- nemo_evaluator_launcher/executors/local/executor.py +377 -22
- nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
- nemo_evaluator_launcher/executors/slurm/executor.py +422 -59
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/utils.py +32 -46
- nemo_evaluator_launcher/package_info.py +1 -1
- nemo_evaluator_launcher/resources/mapping.toml +56 -15
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/METADATA +3 -3
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/RECORD +28 -26
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
|
@@ -22,6 +22,7 @@ from typing import Optional
|
|
|
22
22
|
import yaml
|
|
23
23
|
from omegaconf import DictConfig, OmegaConf
|
|
24
24
|
|
|
25
|
+
from nemo_evaluator_launcher.cli.version import get_versions
|
|
25
26
|
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
26
27
|
|
|
27
28
|
|
|
@@ -35,35 +36,62 @@ class CmdAndReadableComment:
|
|
|
35
36
|
# A debuggale readable comment that can be passed along for accompanying
|
|
36
37
|
# the actual command
|
|
37
38
|
debug: str
|
|
39
|
+
# Whether the content might be potentially unsafe. This is a flag useful for
|
|
40
|
+
# downstream callers who want to raise exceptions e.g. when a script was
|
|
41
|
+
# saved that would execute this command.
|
|
42
|
+
is_potentially_unsafe: bool = False
|
|
38
43
|
|
|
39
44
|
|
|
40
|
-
def
|
|
41
|
-
|
|
42
|
-
) -> CmdAndReadableComment:
|
|
43
|
-
"""Create a safe (see below) echo command saving a yaml to file.
|
|
45
|
+
def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComment:
|
|
46
|
+
"""Create a safe (see below) echo command saving a string to file.
|
|
44
47
|
|
|
45
48
|
Safety in this context means the ability to pass such echo command through the
|
|
46
49
|
`bash -c '...'` boundaries for example.
|
|
47
50
|
|
|
48
51
|
Naturally, enconding with base64 creates debuggability issues. For that, the second
|
|
49
|
-
output of the function is the
|
|
52
|
+
output of the function is the string with bash comment signs prepended.
|
|
50
53
|
"""
|
|
51
|
-
|
|
54
|
+
str_to_save_b64 = base64.b64encode(str_to_save.encode("utf-8")).decode("utf-8")
|
|
52
55
|
debug_str = "\n".join(
|
|
53
|
-
[f"# Contents of {filename}"] + ["# " + s for s in
|
|
56
|
+
[f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()]
|
|
54
57
|
)
|
|
55
58
|
return CmdAndReadableComment(
|
|
56
|
-
cmd=f'echo "{
|
|
59
|
+
cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str
|
|
57
60
|
)
|
|
58
61
|
|
|
59
62
|
|
|
63
|
+
def _set_nested_optionally_overriding(
|
|
64
|
+
d: dict, keys: list[str], val: object, *, override_if_exists: bool = False
|
|
65
|
+
):
|
|
66
|
+
"""Sets d[...keys....] = value, creating keys all the way"""
|
|
67
|
+
temp = d
|
|
68
|
+
for key in keys[:-1]:
|
|
69
|
+
temp = temp.setdefault(key, {})
|
|
70
|
+
if override_if_exists or keys[-1] not in temp:
|
|
71
|
+
temp[keys[-1]] = val
|
|
72
|
+
|
|
73
|
+
|
|
60
74
|
def get_eval_factory_config(
|
|
61
|
-
cfg: DictConfig,
|
|
75
|
+
cfg: DictConfig,
|
|
76
|
+
user_task_config: DictConfig,
|
|
62
77
|
) -> dict:
|
|
63
78
|
"""Extract config fields for eval factory.
|
|
64
79
|
|
|
65
80
|
This function extracts the config field similar to how overrides are handled.
|
|
81
|
+
|
|
82
|
+
Overrides will start to be deprecated (or not, but at least a warning will be logged).
|
|
66
83
|
"""
|
|
84
|
+
|
|
85
|
+
if cfg.evaluation.get("overrides") or user_task_config.get("overrides"):
|
|
86
|
+
# TODO(agronskiy): start removing overrides, test `test_start_deprecating_overrides`
|
|
87
|
+
# will start failing soon.
|
|
88
|
+
logger.warning(
|
|
89
|
+
"We are deprecating using old-style dot-delimited overrides "
|
|
90
|
+
"in favour of `nemo_evaluator_config` field. Please check "
|
|
91
|
+
"the documentation."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
logger.debug("Getting nemo evaluator merged config")
|
|
67
95
|
# Extract config fields similar to overrides - convert to basic Python types first
|
|
68
96
|
# Support both new and old format for backward compatibility
|
|
69
97
|
cfg_config = cfg.evaluation.get("nemo_evaluator_config") or cfg.evaluation.get(
|
|
@@ -80,17 +108,115 @@ def get_eval_factory_config(
|
|
|
80
108
|
user_config = OmegaConf.to_container(user_config, resolve=True)
|
|
81
109
|
|
|
82
110
|
# Merge the configs
|
|
83
|
-
|
|
84
|
-
|
|
111
|
+
merged_nemo_evaluator_config: dict = OmegaConf.to_container(
|
|
112
|
+
OmegaConf.merge(cfg_config, user_config)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
logger.debug(
|
|
116
|
+
"Merged nemo evaluator config, not final",
|
|
117
|
+
source_global_cfg=cfg_config,
|
|
118
|
+
source_task_config=user_config,
|
|
119
|
+
result=merged_nemo_evaluator_config,
|
|
120
|
+
)
|
|
85
121
|
|
|
86
|
-
return
|
|
122
|
+
return merged_nemo_evaluator_config
|
|
87
123
|
|
|
88
124
|
|
|
89
125
|
def get_eval_factory_command(
|
|
90
|
-
cfg: DictConfig,
|
|
126
|
+
cfg: DictConfig,
|
|
127
|
+
user_task_config: DictConfig,
|
|
128
|
+
task_definition: dict,
|
|
91
129
|
) -> CmdAndReadableComment:
|
|
92
|
-
|
|
130
|
+
# This gets the eval_factory_config merged from both top-level and task-level.
|
|
131
|
+
merged_nemo_evaluator_config = get_eval_factory_config(
|
|
132
|
+
cfg,
|
|
133
|
+
user_task_config,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# We now prepare the config to be passed to `nemo-evaluator` command.
|
|
137
|
+
_set_nested_optionally_overriding(
|
|
138
|
+
merged_nemo_evaluator_config,
|
|
139
|
+
["target", "api_endpoint", "url"],
|
|
140
|
+
get_endpoint_url(
|
|
141
|
+
cfg,
|
|
142
|
+
merged_nemo_evaluator_config=merged_nemo_evaluator_config,
|
|
143
|
+
endpoint_type=task_definition["endpoint_type"],
|
|
144
|
+
),
|
|
145
|
+
)
|
|
146
|
+
_set_nested_optionally_overriding(
|
|
147
|
+
merged_nemo_evaluator_config,
|
|
148
|
+
["target", "api_endpoint", "model_id"],
|
|
149
|
+
get_served_model_name(cfg),
|
|
150
|
+
)
|
|
151
|
+
_set_nested_optionally_overriding(
|
|
152
|
+
merged_nemo_evaluator_config,
|
|
153
|
+
["target", "api_endpoint", "type"],
|
|
154
|
+
task_definition["endpoint_type"],
|
|
155
|
+
)
|
|
156
|
+
_set_nested_optionally_overriding(
|
|
157
|
+
merged_nemo_evaluator_config,
|
|
158
|
+
["config", "type"],
|
|
159
|
+
task_definition["task"],
|
|
160
|
+
)
|
|
161
|
+
_set_nested_optionally_overriding(
|
|
162
|
+
merged_nemo_evaluator_config,
|
|
163
|
+
["config", "output_dir"],
|
|
164
|
+
"/results",
|
|
165
|
+
)
|
|
166
|
+
_set_nested_optionally_overriding(
|
|
167
|
+
merged_nemo_evaluator_config,
|
|
168
|
+
["target", "api_endpoint", "api_key"],
|
|
169
|
+
"API_KEY",
|
|
170
|
+
)
|
|
171
|
+
_set_nested_optionally_overriding(
|
|
172
|
+
merged_nemo_evaluator_config,
|
|
173
|
+
[
|
|
174
|
+
"metadata",
|
|
175
|
+
"launcher_resolved_config",
|
|
176
|
+
],
|
|
177
|
+
OmegaConf.to_container(cfg, resolve=True),
|
|
178
|
+
)
|
|
179
|
+
_set_nested_optionally_overriding(
|
|
180
|
+
merged_nemo_evaluator_config,
|
|
181
|
+
["metadata", "versioning"],
|
|
182
|
+
get_versions(),
|
|
183
|
+
)
|
|
93
184
|
|
|
185
|
+
# Now get the pre_cmd either from `evaluation.pre_cmd` or task-level pre_cmd. Note the
|
|
186
|
+
# order -- task level wins.
|
|
187
|
+
pre_cmd: str = (
|
|
188
|
+
user_task_config.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
is_potentially_unsafe = False
|
|
192
|
+
if pre_cmd:
|
|
193
|
+
logger.warning(
|
|
194
|
+
"Found non-empty pre_cmd that might be a security risk if executed. "
|
|
195
|
+
"Setting `is_potentially_unsafe` to `True`",
|
|
196
|
+
pre_cmd=pre_cmd,
|
|
197
|
+
)
|
|
198
|
+
is_potentially_unsafe = True
|
|
199
|
+
_set_nested_optionally_overriding(
|
|
200
|
+
merged_nemo_evaluator_config,
|
|
201
|
+
["metadata", "pre_cmd"],
|
|
202
|
+
pre_cmd,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
create_pre_script_cmd = _str_to_echo_command(pre_cmd, filename="pre_cmd.sh")
|
|
206
|
+
|
|
207
|
+
create_yaml_cmd = _str_to_echo_command(
|
|
208
|
+
yaml.safe_dump(merged_nemo_evaluator_config), "config_ef.yaml"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# NOTE: we use `source` to allow tricks like exports etc (if needed) -- it runs in the same
|
|
212
|
+
# shell as the command.
|
|
213
|
+
eval_command = (
|
|
214
|
+
"cmd=$(command -v nemo-evaluator >/dev/null 2>&1 && echo nemo-evaluator || echo eval-factory) "
|
|
215
|
+
+ "&& source pre_cmd.sh "
|
|
216
|
+
+ "&& $cmd run_eval --run_config config_ef.yaml"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# NOTE: see note and test about deprecating that.
|
|
94
220
|
overrides = copy.deepcopy(dict(cfg.evaluation.get("overrides", {})))
|
|
95
221
|
overrides.update(dict(user_task_config.get("overrides", {})))
|
|
96
222
|
# NOTE(dfridman): Temporary fix to make sure that the overrides arg is not split into multiple lines.
|
|
@@ -99,46 +225,46 @@ def get_eval_factory_command(
|
|
|
99
225
|
k: (v.strip("\n") if isinstance(v, str) else v) for k, v in overrides.items()
|
|
100
226
|
}
|
|
101
227
|
overrides_str = ",".join([f"{k}={v}" for k, v in overrides.items()])
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
model_id = get_served_model_name(cfg)
|
|
105
|
-
model_type = task_definition["endpoint_type"]
|
|
106
|
-
eval_type = task_definition["task"]
|
|
107
|
-
|
|
108
|
-
create_file_cmd = _yaml_to_echo_command(
|
|
109
|
-
yaml.safe_dump(config_fields), "config_ef.yaml"
|
|
110
|
-
)
|
|
111
|
-
eval_command = f"""cmd=$([[ $(command -v nemo-evaluator) ]] && echo 'nemo-evaluator' || echo 'eval-factory') && $cmd run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
|
|
112
|
-
|
|
113
|
-
if overrides:
|
|
228
|
+
if overrides_str:
|
|
114
229
|
eval_command = f"{eval_command} --overrides {overrides_str}"
|
|
115
230
|
|
|
116
231
|
# We return both the command and the debugging base64-decoded strings, useful
|
|
117
232
|
# for exposing when building scripts.
|
|
118
233
|
return CmdAndReadableComment(
|
|
119
|
-
cmd=
|
|
234
|
+
cmd=create_pre_script_cmd.cmd
|
|
235
|
+
+ " && "
|
|
236
|
+
+ create_yaml_cmd.cmd
|
|
237
|
+
+ " && "
|
|
238
|
+
+ eval_command,
|
|
239
|
+
debug=create_pre_script_cmd.debug + "\n\n" + create_yaml_cmd.debug,
|
|
240
|
+
is_potentially_unsafe=is_potentially_unsafe,
|
|
120
241
|
)
|
|
121
242
|
|
|
122
243
|
|
|
123
244
|
def get_endpoint_url(
|
|
124
|
-
cfg: DictConfig,
|
|
245
|
+
cfg: DictConfig,
|
|
246
|
+
merged_nemo_evaluator_config: dict,
|
|
247
|
+
endpoint_type: str,
|
|
125
248
|
) -> str:
|
|
126
249
|
def apply_url_override(url: str) -> str:
|
|
127
250
|
"""Apply user URL override if provided."""
|
|
128
|
-
nemo_evaluator_config_url =
|
|
129
|
-
"
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
override_url = user_task_config.get("overrides", {}).get(
|
|
133
|
-
"config.target.api_endpoint.url", None
|
|
251
|
+
nemo_evaluator_config_url = (
|
|
252
|
+
merged_nemo_evaluator_config.get("target", {})
|
|
253
|
+
.get("api_endpoint", {})
|
|
254
|
+
.get("url", None)
|
|
134
255
|
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
256
|
+
|
|
257
|
+
if nemo_evaluator_config_url:
|
|
258
|
+
return nemo_evaluator_config_url
|
|
259
|
+
|
|
260
|
+
# Being deprecated, see `get_eval_factory_config` message.
|
|
261
|
+
overrides_old_style_url = merged_nemo_evaluator_config.get("overrides", {}).get(
|
|
262
|
+
"target.api_endpoint.url", None
|
|
141
263
|
)
|
|
264
|
+
if overrides_old_style_url:
|
|
265
|
+
return overrides_old_style_url
|
|
266
|
+
|
|
267
|
+
return url
|
|
142
268
|
|
|
143
269
|
if cfg.deployment.type == "none":
|
|
144
270
|
# For deployment: none, use target URL regardless of executor type
|
|
@@ -160,9 +286,16 @@ def get_endpoint_url(
|
|
|
160
286
|
|
|
161
287
|
else:
|
|
162
288
|
# Local executor - use localhost
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
289
|
+
endpoint_uri = cfg.deployment.endpoints[endpoint_type]
|
|
290
|
+
|
|
291
|
+
# Use HAProxy port if multiple_instances is enabled
|
|
292
|
+
if cfg.deployment.get("multiple_instances", False):
|
|
293
|
+
proxy_config = cfg.execution.get("proxy", {}).get("config", {})
|
|
294
|
+
port = proxy_config.get("haproxy_port", 5009)
|
|
295
|
+
else:
|
|
296
|
+
port = cfg.deployment.port
|
|
297
|
+
|
|
298
|
+
endpoint_url = f"http://127.0.0.1:{port}{endpoint_uri}"
|
|
166
299
|
return endpoint_url
|
|
167
300
|
|
|
168
301
|
|
|
@@ -61,8 +61,9 @@ import structlog
|
|
|
61
61
|
# both are unset, default would be used.
|
|
62
62
|
_LOG_LEVEL_ENV_VAR = "NEMO_EVALUATOR_LOG_LEVEL"
|
|
63
63
|
_DEFAULT_LOG_LEVEL = "WARNING"
|
|
64
|
-
|
|
65
|
-
# Keep minimal, broad substrings
|
|
64
|
+
_SENSITIVE_KEY_SUBSTRINGS_NORMALIZED = {
|
|
65
|
+
# Keep minimal, broad substrings
|
|
66
|
+
# NOTE: normalized: lowercased, no spaces/_/-
|
|
66
67
|
"authorization", # covers proxy-authorization, etc.
|
|
67
68
|
"apikey", # covers api_key, api-key, x-api-key, nvidia_api_key, ...
|
|
68
69
|
"accesskey", # covers access_key / access-key
|
|
@@ -73,6 +74,10 @@ _SENSITIVE_KEY_SUBSTRINGS = {
|
|
|
73
74
|
"pwd", # common shorthand
|
|
74
75
|
"passwd", # common variant
|
|
75
76
|
}
|
|
77
|
+
_ALLOWLISTED_KEYS_SUBSTRINGS = {
|
|
78
|
+
# NOTE: non-normalized (for allowlisting we want more control)
|
|
79
|
+
"_tokens", # This likely would allow us to not redact useful stuff like `limit_tokens`, `max_new_tokens`
|
|
80
|
+
}
|
|
76
81
|
|
|
77
82
|
|
|
78
83
|
def _mask(val: object) -> str:
|
|
@@ -91,8 +96,11 @@ def _normalize(name: object) -> str:
|
|
|
91
96
|
|
|
92
97
|
|
|
93
98
|
def _is_sensitive_key(key: object) -> bool:
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
k_norm = _normalize(key)
|
|
100
|
+
k_non_norm = str(key)
|
|
101
|
+
return any(
|
|
102
|
+
substr in k_norm for substr in _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED
|
|
103
|
+
) and not any(substr in k_non_norm for substr in _ALLOWLISTED_KEYS_SUBSTRINGS)
|
|
96
104
|
|
|
97
105
|
|
|
98
106
|
def _redact_mapping(m: dict) -> dict:
|
|
@@ -263,6 +271,9 @@ def _configure_structlog() -> None:
|
|
|
263
271
|
structlog.processors.UnicodeDecoder(),
|
|
264
272
|
]
|
|
265
273
|
|
|
274
|
+
# Check if stderr is a TTY to determine if colors should be enabled
|
|
275
|
+
colors_enabled = sys.stderr.isatty()
|
|
276
|
+
|
|
266
277
|
logging.config.dictConfig(
|
|
267
278
|
{
|
|
268
279
|
"version": 1,
|
|
@@ -273,7 +284,7 @@ def _configure_structlog() -> None:
|
|
|
273
284
|
"()": "structlog.stdlib.ProcessorFormatter",
|
|
274
285
|
"processors": [
|
|
275
286
|
*shared_processors,
|
|
276
|
-
MainConsoleRenderer(colors=
|
|
287
|
+
MainConsoleRenderer(colors=colors_enabled),
|
|
277
288
|
],
|
|
278
289
|
},
|
|
279
290
|
# Formatter for plain file output
|
|
@@ -28,6 +28,7 @@ USAGE:
|
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
import os
|
|
31
|
+
import sys
|
|
31
32
|
|
|
32
33
|
# If this env var is set, it will override a more standard "LOG_LEVEL". If
|
|
33
34
|
# both are unset, default would be used.
|
|
@@ -35,11 +36,17 @@ _DISABLE_COLOR_ENV_VAR = "NEMO_EVALUATOR_DISABLE_COLOR"
|
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def _is_color_disabled():
|
|
39
|
+
# Check environment variable first
|
|
38
40
|
env_var = os.environ.get(_DISABLE_COLOR_ENV_VAR, "0").lower()
|
|
39
41
|
|
|
40
42
|
if "1" in env_var or "yes" in env_var or "y" in env_var or "true" in env_var:
|
|
41
43
|
return True
|
|
42
44
|
|
|
45
|
+
# If not explicitly disabled, check if stdout is a TTY
|
|
46
|
+
# Colors are disabled if output is not a TTY
|
|
47
|
+
if not sys.stdout.isatty():
|
|
48
|
+
return True
|
|
49
|
+
|
|
43
50
|
return False
|
|
44
51
|
|
|
45
52
|
|
|
@@ -19,6 +19,7 @@ checkpoint_path: ???
|
|
|
19
19
|
served_model_name: ???
|
|
20
20
|
port: 8000
|
|
21
21
|
tensor_parallel_size: 8
|
|
22
|
+
pipeline_parallel_size: 1
|
|
22
23
|
data_parallel_size: 1
|
|
23
24
|
extra_args: ""
|
|
24
25
|
env_vars: {} # {name: value} dict
|
|
@@ -33,6 +34,7 @@ command: python3 -m sglang.launch_server
|
|
|
33
34
|
--host 0.0.0.0
|
|
34
35
|
--port ${deployment.port}
|
|
35
36
|
--served-model-name ${deployment.served_model_name}
|
|
36
|
-
--tp ${deployment.tensor_parallel_size}
|
|
37
|
-
--dp ${deployment.data_parallel_size}
|
|
37
|
+
--tp-size ${deployment.tensor_parallel_size}
|
|
38
|
+
--dp-size ${deployment.data_parallel_size}
|
|
39
|
+
--pp-size ${deployment.pipeline_parallel_size}
|
|
38
40
|
${deployment.extra_args}
|
|
@@ -3,7 +3,7 @@ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
|
|
|
3
3
|
checkpoint_path: ???
|
|
4
4
|
served_model_name: ???
|
|
5
5
|
port: 8000
|
|
6
|
-
tensor_parallel_size:
|
|
6
|
+
tensor_parallel_size: 8
|
|
7
7
|
pipeline_parallel_size: 1
|
|
8
8
|
extra_args: ""
|
|
9
9
|
|
|
@@ -12,8 +12,7 @@ endpoints:
|
|
|
12
12
|
completions: /v1/completions
|
|
13
13
|
health: /health
|
|
14
14
|
|
|
15
|
-
command:
|
|
16
|
-
mpirun --allow-run-as-root --oversubscribe
|
|
15
|
+
command: mpirun --allow-run-as-root --oversubscribe
|
|
17
16
|
trtllm-serve serve /checkpoint
|
|
18
17
|
--tp_size=${deployment.tensor_parallel_size}
|
|
19
18
|
--pp_size=${deployment.pipeline_parallel_size}
|
|
@@ -37,6 +37,5 @@ command: vllm serve ${oc.select:deployment.hf_model_handle,/checkpoint}
|
|
|
37
37
|
--port ${deployment.port}
|
|
38
38
|
--trust-remote-code
|
|
39
39
|
--served-model-name ${deployment.served_model_name}
|
|
40
|
-
--enforce-eager
|
|
41
40
|
--gpu-memory-utilization ${deployment.gpu_memory_utilization}
|
|
42
41
|
${deployment.extra_args}
|
|
@@ -25,6 +25,12 @@ ntasks_per_node: 1
|
|
|
25
25
|
gres: gpu:8
|
|
26
26
|
walltime: 01:00:00
|
|
27
27
|
subproject: nemo-evaluator-launcher
|
|
28
|
+
sbatch_comment: null # Optional comment for SLURM job (translates to #SBATCH --comment='...')
|
|
29
|
+
|
|
30
|
+
# Deployment-specific SLURM configuration
|
|
31
|
+
deployment:
|
|
32
|
+
n_tasks: 1 # Number of tasks for deployment srun (default: 1, for multi-instance set to num_nodes)
|
|
33
|
+
|
|
28
34
|
env_vars:
|
|
29
35
|
deployment: {}
|
|
30
36
|
evaluation: {}
|
|
@@ -32,3 +38,11 @@ mounts:
|
|
|
32
38
|
deployment: {}
|
|
33
39
|
evaluation: {}
|
|
34
40
|
mount_home: true
|
|
41
|
+
|
|
42
|
+
proxy:
|
|
43
|
+
type: haproxy
|
|
44
|
+
image: haproxy:latest
|
|
45
|
+
config:
|
|
46
|
+
haproxy_port: 5009
|
|
47
|
+
health_check_path: /health
|
|
48
|
+
health_check_status: 200
|
|
@@ -21,10 +21,12 @@ Defines the abstract interface for all executor implementations and common statu
|
|
|
21
21
|
from abc import ABC, abstractmethod
|
|
22
22
|
from dataclasses import dataclass
|
|
23
23
|
from enum import Enum
|
|
24
|
-
from typing import Any, Optional
|
|
24
|
+
from typing import Any, Iterator, Optional, Tuple
|
|
25
25
|
|
|
26
26
|
from omegaconf import DictConfig
|
|
27
27
|
|
|
28
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
29
|
+
|
|
28
30
|
|
|
29
31
|
class ExecutionState(Enum):
|
|
30
32
|
"""Enumeration of possible execution states."""
|
|
@@ -118,3 +120,31 @@ class BaseExecutor(ABC):
|
|
|
118
120
|
return f"Could not find or kill job {job_id} ({container_or_id}) - job was already killed"
|
|
119
121
|
# Generic error message
|
|
120
122
|
return f"Could not find or kill job {job_id} ({container_or_id})"
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def stream_logs(
|
|
126
|
+
id: str, executor_name: Optional[str] = None
|
|
127
|
+
) -> Iterator[Tuple[str, str, str]]:
|
|
128
|
+
"""Stream logs from a job or invocation group.
|
|
129
|
+
|
|
130
|
+
This is an optional method that executors can implement to provide log streaming.
|
|
131
|
+
If not implemented, it will log a warning and raise NotImplementedError.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
id: Unique job identifier or invocation identifier.
|
|
135
|
+
executor_name: Optional executor name for warning messages. If not provided,
|
|
136
|
+
will attempt to infer from the calling context.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
|
|
140
|
+
Empty lines are yielded as empty strings.
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
NotImplementedError: If the executor does not support log streaming.
|
|
144
|
+
"""
|
|
145
|
+
executor_display_name = executor_name or "this executor"
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"Log streaming is not yet implemented for executor '{executor_display_name}'. "
|
|
148
|
+
"Only 'local' executor currently supports log streaming."
|
|
149
|
+
)
|
|
150
|
+
raise NotImplementedError("This executor does not support log streaming")
|
|
@@ -19,6 +19,7 @@ Handles Lepton endpoint creation, management, and health checks.
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
import json
|
|
22
|
+
import shlex
|
|
22
23
|
import subprocess
|
|
23
24
|
import time
|
|
24
25
|
from pathlib import Path
|
|
@@ -27,6 +28,7 @@ from typing import Any, Dict, Optional
|
|
|
27
28
|
# Import lepton dependencies
|
|
28
29
|
from omegaconf import DictConfig
|
|
29
30
|
|
|
31
|
+
from nemo_evaluator_launcher.common.helpers import _str_to_echo_command
|
|
30
32
|
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
31
33
|
|
|
32
34
|
|
|
@@ -235,6 +237,8 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
235
237
|
Returns:
|
|
236
238
|
Container specification for Lepton.
|
|
237
239
|
"""
|
|
240
|
+
# Extract pre_cmd from deployment_cfg
|
|
241
|
+
pre_cmd: str = deployment_cfg.get("pre_cmd") or ""
|
|
238
242
|
container_spec = {
|
|
239
243
|
"image": deployment_cfg.image,
|
|
240
244
|
"ports": [{"container_port": deployment_cfg.port}],
|
|
@@ -258,6 +262,18 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
258
262
|
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
259
263
|
command_parts.extend(deployment_cfg.extra_args.split())
|
|
260
264
|
|
|
265
|
+
# Wrap with pre_cmd if provided
|
|
266
|
+
if pre_cmd:
|
|
267
|
+
create_pre_script_cmd = _str_to_echo_command(
|
|
268
|
+
pre_cmd, filename="deployment_pre_cmd.sh"
|
|
269
|
+
)
|
|
270
|
+
original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
|
|
271
|
+
command_parts = [
|
|
272
|
+
"/bin/bash",
|
|
273
|
+
"-c",
|
|
274
|
+
f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
|
|
275
|
+
]
|
|
276
|
+
|
|
261
277
|
container_spec["command"] = command_parts
|
|
262
278
|
|
|
263
279
|
elif deployment_cfg.type == "sglang":
|
|
@@ -278,12 +294,31 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
278
294
|
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
279
295
|
command_parts.extend(deployment_cfg.extra_args.split())
|
|
280
296
|
|
|
297
|
+
# Wrap with pre_cmd if provided
|
|
298
|
+
if pre_cmd:
|
|
299
|
+
create_pre_script_cmd = _str_to_echo_command(
|
|
300
|
+
pre_cmd, filename="deployment_pre_cmd.sh"
|
|
301
|
+
)
|
|
302
|
+
original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
|
|
303
|
+
command_parts = [
|
|
304
|
+
"/bin/bash",
|
|
305
|
+
"-c",
|
|
306
|
+
f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
|
|
307
|
+
]
|
|
308
|
+
|
|
281
309
|
container_spec["command"] = command_parts
|
|
282
310
|
|
|
283
311
|
elif deployment_cfg.type == "nim":
|
|
284
312
|
# NIM containers use their default entrypoint - no custom command needed
|
|
285
313
|
# Configuration is handled via environment variables
|
|
286
|
-
|
|
314
|
+
# pre_cmd is not supported for NIM deployments
|
|
315
|
+
if pre_cmd:
|
|
316
|
+
logger.error(
|
|
317
|
+
"pre_cmd is not supported for NIM deployments",
|
|
318
|
+
deployment_type="nim",
|
|
319
|
+
pre_cmd=pre_cmd,
|
|
320
|
+
)
|
|
321
|
+
raise ValueError("pre_cmd is not supported for NIM deployments")
|
|
287
322
|
|
|
288
323
|
return container_spec
|
|
289
324
|
|