nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. nemo_evaluator_launcher/api/functional.py +105 -1
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/main.py +12 -0
  4. nemo_evaluator_launcher/cli/run.py +73 -15
  5. nemo_evaluator_launcher/cli/version.py +26 -23
  6. nemo_evaluator_launcher/common/helpers.py +176 -43
  7. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  8. nemo_evaluator_launcher/common/printing_utils.py +7 -0
  9. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  10. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  11. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  12. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  13. nemo_evaluator_launcher/executors/base.py +31 -1
  14. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  15. nemo_evaluator_launcher/executors/lepton/executor.py +81 -1
  16. nemo_evaluator_launcher/executors/local/executor.py +377 -22
  17. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  18. nemo_evaluator_launcher/executors/slurm/executor.py +422 -59
  19. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  20. nemo_evaluator_launcher/exporters/utils.py +32 -46
  21. nemo_evaluator_launcher/package_info.py +1 -1
  22. nemo_evaluator_launcher/resources/mapping.toml +56 -15
  23. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/METADATA +3 -3
  24. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/RECORD +28 -26
  25. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
  26. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
  27. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
  28. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ from typing import Optional
22
22
  import yaml
23
23
  from omegaconf import DictConfig, OmegaConf
24
24
 
25
+ from nemo_evaluator_launcher.cli.version import get_versions
25
26
  from nemo_evaluator_launcher.common.logging_utils import logger
26
27
 
27
28
 
@@ -35,35 +36,62 @@ class CmdAndReadableComment:
35
36
  # A debuggale readable comment that can be passed along for accompanying
36
37
  # the actual command
37
38
  debug: str
39
+ # Whether the content might be potentially unsafe. This is a flag useful for
40
+ # downstream callers who want to raise exceptions e.g. when a script was
41
+ # saved that would execute this command.
42
+ is_potentially_unsafe: bool = False
38
43
 
39
44
 
40
- def _yaml_to_echo_command(
41
- yaml_str: str, filename: str = "config_ef.yaml"
42
- ) -> CmdAndReadableComment:
43
- """Create a safe (see below) echo command saving a yaml to file.
45
+ def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComment:
46
+ """Create a safe (see below) echo command saving a string to file.
44
47
 
45
48
  Safety in this context means the ability to pass such echo command through the
46
49
  `bash -c '...'` boundaries for example.
47
50
 
48
51
  Naturally, enconding with base64 creates debuggability issues. For that, the second
49
- output of the function is the yaml string with bash comment signs prepended.
52
+ output of the function is the string with bash comment signs prepended.
50
53
  """
51
- yaml_str_b64 = base64.b64encode(yaml_str.encode("utf-8")).decode("utf-8")
54
+ str_to_save_b64 = base64.b64encode(str_to_save.encode("utf-8")).decode("utf-8")
52
55
  debug_str = "\n".join(
53
- [f"# Contents of {filename}"] + ["# " + s for s in yaml_str.splitlines()]
56
+ [f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()]
54
57
  )
55
58
  return CmdAndReadableComment(
56
- cmd=f'echo "{yaml_str_b64}" | base64 -d > {filename}', debug=debug_str
59
+ cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str
57
60
  )
58
61
 
59
62
 
63
+ def _set_nested_optionally_overriding(
64
+ d: dict, keys: list[str], val: object, *, override_if_exists: bool = False
65
+ ):
66
+ """Sets d[...keys....] = value, creating keys all the way"""
67
+ temp = d
68
+ for key in keys[:-1]:
69
+ temp = temp.setdefault(key, {})
70
+ if override_if_exists or keys[-1] not in temp:
71
+ temp[keys[-1]] = val
72
+
73
+
60
74
  def get_eval_factory_config(
61
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
75
+ cfg: DictConfig,
76
+ user_task_config: DictConfig,
62
77
  ) -> dict:
63
78
  """Extract config fields for eval factory.
64
79
 
65
80
  This function extracts the config field similar to how overrides are handled.
81
+
82
+ Overrides will start to be deprecated (or not, but at least a warning will be logged).
66
83
  """
84
+
85
+ if cfg.evaluation.get("overrides") or user_task_config.get("overrides"):
86
+ # TODO(agronskiy): start removing overrides, test `test_start_deprecating_overrides`
87
+ # will start failing soon.
88
+ logger.warning(
89
+ "We are deprecating using old-style dot-delimited overrides "
90
+ "in favour of `nemo_evaluator_config` field. Please check "
91
+ "the documentation."
92
+ )
93
+
94
+ logger.debug("Getting nemo evaluator merged config")
67
95
  # Extract config fields similar to overrides - convert to basic Python types first
68
96
  # Support both new and old format for backward compatibility
69
97
  cfg_config = cfg.evaluation.get("nemo_evaluator_config") or cfg.evaluation.get(
@@ -80,17 +108,115 @@ def get_eval_factory_config(
80
108
  user_config = OmegaConf.to_container(user_config, resolve=True)
81
109
 
82
110
  # Merge the configs
83
- config_fields = copy.deepcopy(cfg_config or {})
84
- config_fields.update(user_config or {})
111
+ merged_nemo_evaluator_config: dict = OmegaConf.to_container(
112
+ OmegaConf.merge(cfg_config, user_config)
113
+ )
114
+
115
+ logger.debug(
116
+ "Merged nemo evaluator config, not final",
117
+ source_global_cfg=cfg_config,
118
+ source_task_config=user_config,
119
+ result=merged_nemo_evaluator_config,
120
+ )
85
121
 
86
- return config_fields
122
+ return merged_nemo_evaluator_config
87
123
 
88
124
 
89
125
  def get_eval_factory_command(
90
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
126
+ cfg: DictConfig,
127
+ user_task_config: DictConfig,
128
+ task_definition: dict,
91
129
  ) -> CmdAndReadableComment:
92
- config_fields = get_eval_factory_config(cfg, user_task_config, task_definition)
130
+ # This gets the eval_factory_config merged from both top-level and task-level.
131
+ merged_nemo_evaluator_config = get_eval_factory_config(
132
+ cfg,
133
+ user_task_config,
134
+ )
135
+
136
+ # We now prepare the config to be passed to `nemo-evaluator` command.
137
+ _set_nested_optionally_overriding(
138
+ merged_nemo_evaluator_config,
139
+ ["target", "api_endpoint", "url"],
140
+ get_endpoint_url(
141
+ cfg,
142
+ merged_nemo_evaluator_config=merged_nemo_evaluator_config,
143
+ endpoint_type=task_definition["endpoint_type"],
144
+ ),
145
+ )
146
+ _set_nested_optionally_overriding(
147
+ merged_nemo_evaluator_config,
148
+ ["target", "api_endpoint", "model_id"],
149
+ get_served_model_name(cfg),
150
+ )
151
+ _set_nested_optionally_overriding(
152
+ merged_nemo_evaluator_config,
153
+ ["target", "api_endpoint", "type"],
154
+ task_definition["endpoint_type"],
155
+ )
156
+ _set_nested_optionally_overriding(
157
+ merged_nemo_evaluator_config,
158
+ ["config", "type"],
159
+ task_definition["task"],
160
+ )
161
+ _set_nested_optionally_overriding(
162
+ merged_nemo_evaluator_config,
163
+ ["config", "output_dir"],
164
+ "/results",
165
+ )
166
+ _set_nested_optionally_overriding(
167
+ merged_nemo_evaluator_config,
168
+ ["target", "api_endpoint", "api_key"],
169
+ "API_KEY",
170
+ )
171
+ _set_nested_optionally_overriding(
172
+ merged_nemo_evaluator_config,
173
+ [
174
+ "metadata",
175
+ "launcher_resolved_config",
176
+ ],
177
+ OmegaConf.to_container(cfg, resolve=True),
178
+ )
179
+ _set_nested_optionally_overriding(
180
+ merged_nemo_evaluator_config,
181
+ ["metadata", "versioning"],
182
+ get_versions(),
183
+ )
93
184
 
185
+ # Now get the pre_cmd either from `evaluation.pre_cmd` or task-level pre_cmd. Note the
186
+ # order -- task level wins.
187
+ pre_cmd: str = (
188
+ user_task_config.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
189
+ )
190
+
191
+ is_potentially_unsafe = False
192
+ if pre_cmd:
193
+ logger.warning(
194
+ "Found non-empty pre_cmd that might be a security risk if executed. "
195
+ "Setting `is_potentially_unsafe` to `True`",
196
+ pre_cmd=pre_cmd,
197
+ )
198
+ is_potentially_unsafe = True
199
+ _set_nested_optionally_overriding(
200
+ merged_nemo_evaluator_config,
201
+ ["metadata", "pre_cmd"],
202
+ pre_cmd,
203
+ )
204
+
205
+ create_pre_script_cmd = _str_to_echo_command(pre_cmd, filename="pre_cmd.sh")
206
+
207
+ create_yaml_cmd = _str_to_echo_command(
208
+ yaml.safe_dump(merged_nemo_evaluator_config), "config_ef.yaml"
209
+ )
210
+
211
+ # NOTE: we use `source` to allow tricks like exports etc (if needed) -- it runs in the same
212
+ # shell as the command.
213
+ eval_command = (
214
+ "cmd=$(command -v nemo-evaluator >/dev/null 2>&1 && echo nemo-evaluator || echo eval-factory) "
215
+ + "&& source pre_cmd.sh "
216
+ + "&& $cmd run_eval --run_config config_ef.yaml"
217
+ )
218
+
219
+ # NOTE: see note and test about deprecating that.
94
220
  overrides = copy.deepcopy(dict(cfg.evaluation.get("overrides", {})))
95
221
  overrides.update(dict(user_task_config.get("overrides", {})))
96
222
  # NOTE(dfridman): Temporary fix to make sure that the overrides arg is not split into multiple lines.
@@ -99,46 +225,46 @@ def get_eval_factory_command(
99
225
  k: (v.strip("\n") if isinstance(v, str) else v) for k, v in overrides.items()
100
226
  }
101
227
  overrides_str = ",".join([f"{k}={v}" for k, v in overrides.items()])
102
- model_url = get_endpoint_url(cfg, user_task_config, task_definition)
103
-
104
- model_id = get_served_model_name(cfg)
105
- model_type = task_definition["endpoint_type"]
106
- eval_type = task_definition["task"]
107
-
108
- create_file_cmd = _yaml_to_echo_command(
109
- yaml.safe_dump(config_fields), "config_ef.yaml"
110
- )
111
- eval_command = f"""cmd=$([[ $(command -v nemo-evaluator) ]] && echo 'nemo-evaluator' || echo 'eval-factory') && $cmd run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
112
-
113
- if overrides:
228
+ if overrides_str:
114
229
  eval_command = f"{eval_command} --overrides {overrides_str}"
115
230
 
116
231
  # We return both the command and the debugging base64-decoded strings, useful
117
232
  # for exposing when building scripts.
118
233
  return CmdAndReadableComment(
119
- cmd=create_file_cmd.cmd + " && " + eval_command, debug=create_file_cmd.debug
234
+ cmd=create_pre_script_cmd.cmd
235
+ + " && "
236
+ + create_yaml_cmd.cmd
237
+ + " && "
238
+ + eval_command,
239
+ debug=create_pre_script_cmd.debug + "\n\n" + create_yaml_cmd.debug,
240
+ is_potentially_unsafe=is_potentially_unsafe,
120
241
  )
121
242
 
122
243
 
123
244
  def get_endpoint_url(
124
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
245
+ cfg: DictConfig,
246
+ merged_nemo_evaluator_config: dict,
247
+ endpoint_type: str,
125
248
  ) -> str:
126
249
  def apply_url_override(url: str) -> str:
127
250
  """Apply user URL override if provided."""
128
- nemo_evaluator_config_url = user_task_config.get(
129
- "nemo_evaluator_config", {}
130
- ).get("target.api_endpoint.url", None)
131
-
132
- override_url = user_task_config.get("overrides", {}).get(
133
- "config.target.api_endpoint.url", None
251
+ nemo_evaluator_config_url = (
252
+ merged_nemo_evaluator_config.get("target", {})
253
+ .get("api_endpoint", {})
254
+ .get("url", None)
134
255
  )
135
- return (
136
- override_url
137
- if override_url is not None
138
- else nemo_evaluator_config_url
139
- if nemo_evaluator_config_url is not None
140
- else url
256
+
257
+ if nemo_evaluator_config_url:
258
+ return nemo_evaluator_config_url
259
+
260
+ # Being deprecated, see `get_eval_factory_config` message.
261
+ overrides_old_style_url = merged_nemo_evaluator_config.get("overrides", {}).get(
262
+ "target.api_endpoint.url", None
141
263
  )
264
+ if overrides_old_style_url:
265
+ return overrides_old_style_url
266
+
267
+ return url
142
268
 
143
269
  if cfg.deployment.type == "none":
144
270
  # For deployment: none, use target URL regardless of executor type
@@ -160,9 +286,16 @@ def get_endpoint_url(
160
286
 
161
287
  else:
162
288
  # Local executor - use localhost
163
- task_endpoint_type = task_definition["endpoint_type"]
164
- endpoint_uri = cfg.deployment.endpoints[task_endpoint_type]
165
- endpoint_url = f"http://127.0.0.1:{cfg.deployment.port}{endpoint_uri}"
289
+ endpoint_uri = cfg.deployment.endpoints[endpoint_type]
290
+
291
+ # Use HAProxy port if multiple_instances is enabled
292
+ if cfg.deployment.get("multiple_instances", False):
293
+ proxy_config = cfg.execution.get("proxy", {}).get("config", {})
294
+ port = proxy_config.get("haproxy_port", 5009)
295
+ else:
296
+ port = cfg.deployment.port
297
+
298
+ endpoint_url = f"http://127.0.0.1:{port}{endpoint_uri}"
166
299
  return endpoint_url
167
300
 
168
301
 
@@ -61,8 +61,9 @@ import structlog
61
61
  # both are unset, default would be used.
62
62
  _LOG_LEVEL_ENV_VAR = "NEMO_EVALUATOR_LOG_LEVEL"
63
63
  _DEFAULT_LOG_LEVEL = "WARNING"
64
- _SENSITIVE_KEY_SUBSTRINGS = {
65
- # Keep minimal, broad substrings (normalized: lowercased, no spaces/_/-)
64
+ _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED = {
65
+ # Keep minimal, broad substrings
66
+ # NOTE: normalized: lowercased, no spaces/_/-
66
67
  "authorization", # covers proxy-authorization, etc.
67
68
  "apikey", # covers api_key, api-key, x-api-key, nvidia_api_key, ...
68
69
  "accesskey", # covers access_key / access-key
@@ -73,6 +74,10 @@ _SENSITIVE_KEY_SUBSTRINGS = {
73
74
  "pwd", # common shorthand
74
75
  "passwd", # common variant
75
76
  }
77
+ _ALLOWLISTED_KEYS_SUBSTRINGS = {
78
+ # NOTE: non-normalized (for allowlisting we want more control)
79
+ "_tokens", # This likely would allow us to not redact useful stuff like `limit_tokens`, `max_new_tokens`
80
+ }
76
81
 
77
82
 
78
83
  def _mask(val: object) -> str:
@@ -91,8 +96,11 @@ def _normalize(name: object) -> str:
91
96
 
92
97
 
93
98
  def _is_sensitive_key(key: object) -> bool:
94
- k = _normalize(key)
95
- return any(substr in k for substr in _SENSITIVE_KEY_SUBSTRINGS)
99
+ k_norm = _normalize(key)
100
+ k_non_norm = str(key)
101
+ return any(
102
+ substr in k_norm for substr in _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED
103
+ ) and not any(substr in k_non_norm for substr in _ALLOWLISTED_KEYS_SUBSTRINGS)
96
104
 
97
105
 
98
106
  def _redact_mapping(m: dict) -> dict:
@@ -263,6 +271,9 @@ def _configure_structlog() -> None:
263
271
  structlog.processors.UnicodeDecoder(),
264
272
  ]
265
273
 
274
+ # Check if stderr is a TTY to determine if colors should be enabled
275
+ colors_enabled = sys.stderr.isatty()
276
+
266
277
  logging.config.dictConfig(
267
278
  {
268
279
  "version": 1,
@@ -273,7 +284,7 @@ def _configure_structlog() -> None:
273
284
  "()": "structlog.stdlib.ProcessorFormatter",
274
285
  "processors": [
275
286
  *shared_processors,
276
- MainConsoleRenderer(colors=True),
287
+ MainConsoleRenderer(colors=colors_enabled),
277
288
  ],
278
289
  },
279
290
  # Formatter for plain file output
@@ -28,6 +28,7 @@ USAGE:
28
28
  """
29
29
 
30
30
  import os
31
+ import sys
31
32
 
32
33
  # If this env var is set, it will override a more standard "LOG_LEVEL". If
33
34
  # both are unset, default would be used.
@@ -35,11 +36,17 @@ _DISABLE_COLOR_ENV_VAR = "NEMO_EVALUATOR_DISABLE_COLOR"
35
36
 
36
37
 
37
38
  def _is_color_disabled():
39
+ # Check environment variable first
38
40
  env_var = os.environ.get(_DISABLE_COLOR_ENV_VAR, "0").lower()
39
41
 
40
42
  if "1" in env_var or "yes" in env_var or "y" in env_var or "true" in env_var:
41
43
  return True
42
44
 
45
+ # If not explicitly disabled, check if stdout is a TTY
46
+ # Colors are disabled if output is not a TTY
47
+ if not sys.stdout.isatty():
48
+ return True
49
+
43
50
  return False
44
51
 
45
52
 
@@ -19,6 +19,7 @@ checkpoint_path: ???
19
19
  served_model_name: ???
20
20
  port: 8000
21
21
  tensor_parallel_size: 8
22
+ pipeline_parallel_size: 1
22
23
  data_parallel_size: 1
23
24
  extra_args: ""
24
25
  env_vars: {} # {name: value} dict
@@ -33,6 +34,7 @@ command: python3 -m sglang.launch_server
33
34
  --host 0.0.0.0
34
35
  --port ${deployment.port}
35
36
  --served-model-name ${deployment.served_model_name}
36
- --tp ${deployment.tensor_parallel_size}
37
- --dp ${deployment.data_parallel_size}
37
+ --tp-size ${deployment.tensor_parallel_size}
38
+ --dp-size ${deployment.data_parallel_size}
39
+ --pp-size ${deployment.pipeline_parallel_size}
38
40
  ${deployment.extra_args}
@@ -3,7 +3,7 @@ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
3
3
  checkpoint_path: ???
4
4
  served_model_name: ???
5
5
  port: 8000
6
- tensor_parallel_size: 4
6
+ tensor_parallel_size: 8
7
7
  pipeline_parallel_size: 1
8
8
  extra_args: ""
9
9
 
@@ -12,8 +12,7 @@ endpoints:
12
12
  completions: /v1/completions
13
13
  health: /health
14
14
 
15
- command:
16
- mpirun --allow-run-as-root --oversubscribe
15
+ command: mpirun --allow-run-as-root --oversubscribe
17
16
  trtllm-serve serve /checkpoint
18
17
  --tp_size=${deployment.tensor_parallel_size}
19
18
  --pp_size=${deployment.pipeline_parallel_size}
@@ -37,6 +37,5 @@ command: vllm serve ${oc.select:deployment.hf_model_handle,/checkpoint}
37
37
  --port ${deployment.port}
38
38
  --trust-remote-code
39
39
  --served-model-name ${deployment.served_model_name}
40
- --enforce-eager
41
40
  --gpu-memory-utilization ${deployment.gpu_memory_utilization}
42
41
  ${deployment.extra_args}
@@ -25,6 +25,12 @@ ntasks_per_node: 1
25
25
  gres: gpu:8
26
26
  walltime: 01:00:00
27
27
  subproject: nemo-evaluator-launcher
28
+ sbatch_comment: null # Optional comment for SLURM job (translates to #SBATCH --comment='...')
29
+
30
+ # Deployment-specific SLURM configuration
31
+ deployment:
32
+ n_tasks: 1 # Number of tasks for deployment srun (default: 1, for multi-instance set to num_nodes)
33
+
28
34
  env_vars:
29
35
  deployment: {}
30
36
  evaluation: {}
@@ -32,3 +38,11 @@ mounts:
32
38
  deployment: {}
33
39
  evaluation: {}
34
40
  mount_home: true
41
+
42
+ proxy:
43
+ type: haproxy
44
+ image: haproxy:latest
45
+ config:
46
+ haproxy_port: 5009
47
+ health_check_path: /health
48
+ health_check_status: 200
@@ -21,10 +21,12 @@ Defines the abstract interface for all executor implementations and common statu
21
21
  from abc import ABC, abstractmethod
22
22
  from dataclasses import dataclass
23
23
  from enum import Enum
24
- from typing import Any, Optional
24
+ from typing import Any, Iterator, Optional, Tuple
25
25
 
26
26
  from omegaconf import DictConfig
27
27
 
28
+ from nemo_evaluator_launcher.common.logging_utils import logger
29
+
28
30
 
29
31
  class ExecutionState(Enum):
30
32
  """Enumeration of possible execution states."""
@@ -118,3 +120,31 @@ class BaseExecutor(ABC):
118
120
  return f"Could not find or kill job {job_id} ({container_or_id}) - job was already killed"
119
121
  # Generic error message
120
122
  return f"Could not find or kill job {job_id} ({container_or_id})"
123
+
124
+ @staticmethod
125
+ def stream_logs(
126
+ id: str, executor_name: Optional[str] = None
127
+ ) -> Iterator[Tuple[str, str, str]]:
128
+ """Stream logs from a job or invocation group.
129
+
130
+ This is an optional method that executors can implement to provide log streaming.
131
+ If not implemented, it will log a warning and raise NotImplementedError.
132
+
133
+ Args:
134
+ id: Unique job identifier or invocation identifier.
135
+ executor_name: Optional executor name for warning messages. If not provided,
136
+ will attempt to infer from the calling context.
137
+
138
+ Yields:
139
+ Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
140
+ Empty lines are yielded as empty strings.
141
+
142
+ Raises:
143
+ NotImplementedError: If the executor does not support log streaming.
144
+ """
145
+ executor_display_name = executor_name or "this executor"
146
+ logger.warning(
147
+ f"Log streaming is not yet implemented for executor '{executor_display_name}'. "
148
+ "Only 'local' executor currently supports log streaming."
149
+ )
150
+ raise NotImplementedError("This executor does not support log streaming")
@@ -19,6 +19,7 @@ Handles Lepton endpoint creation, management, and health checks.
19
19
  """
20
20
 
21
21
  import json
22
+ import shlex
22
23
  import subprocess
23
24
  import time
24
25
  from pathlib import Path
@@ -27,6 +28,7 @@ from typing import Any, Dict, Optional
27
28
  # Import lepton dependencies
28
29
  from omegaconf import DictConfig
29
30
 
31
+ from nemo_evaluator_launcher.common.helpers import _str_to_echo_command
30
32
  from nemo_evaluator_launcher.common.logging_utils import logger
31
33
 
32
34
 
@@ -235,6 +237,8 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
235
237
  Returns:
236
238
  Container specification for Lepton.
237
239
  """
240
+ # Extract pre_cmd from deployment_cfg
241
+ pre_cmd: str = deployment_cfg.get("pre_cmd") or ""
238
242
  container_spec = {
239
243
  "image": deployment_cfg.image,
240
244
  "ports": [{"container_port": deployment_cfg.port}],
@@ -258,6 +262,18 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
258
262
  if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
259
263
  command_parts.extend(deployment_cfg.extra_args.split())
260
264
 
265
+ # Wrap with pre_cmd if provided
266
+ if pre_cmd:
267
+ create_pre_script_cmd = _str_to_echo_command(
268
+ pre_cmd, filename="deployment_pre_cmd.sh"
269
+ )
270
+ original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
271
+ command_parts = [
272
+ "/bin/bash",
273
+ "-c",
274
+ f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
275
+ ]
276
+
261
277
  container_spec["command"] = command_parts
262
278
 
263
279
  elif deployment_cfg.type == "sglang":
@@ -278,12 +294,31 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
278
294
  if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
279
295
  command_parts.extend(deployment_cfg.extra_args.split())
280
296
 
297
+ # Wrap with pre_cmd if provided
298
+ if pre_cmd:
299
+ create_pre_script_cmd = _str_to_echo_command(
300
+ pre_cmd, filename="deployment_pre_cmd.sh"
301
+ )
302
+ original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
303
+ command_parts = [
304
+ "/bin/bash",
305
+ "-c",
306
+ f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
307
+ ]
308
+
281
309
  container_spec["command"] = command_parts
282
310
 
283
311
  elif deployment_cfg.type == "nim":
284
312
  # NIM containers use their default entrypoint - no custom command needed
285
313
  # Configuration is handled via environment variables
286
- pass
314
+ # pre_cmd is not supported for NIM deployments
315
+ if pre_cmd:
316
+ logger.error(
317
+ "pre_cmd is not supported for NIM deployments",
318
+ deployment_type="nim",
319
+ pre_cmd=pre_cmd,
320
+ )
321
+ raise ValueError("pre_cmd is not supported for NIM deployments")
287
322
 
288
323
  return container_spec
289
324