nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. nemo_evaluator_launcher/api/functional.py +159 -5
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/ls_task.py +280 -0
  4. nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
  5. nemo_evaluator_launcher/cli/main.py +29 -2
  6. nemo_evaluator_launcher/cli/run.py +114 -16
  7. nemo_evaluator_launcher/cli/version.py +26 -23
  8. nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  9. nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  10. nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  11. nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  12. nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  13. nemo_evaluator_launcher/common/helpers.py +200 -51
  14. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  15. nemo_evaluator_launcher/common/mapping.py +341 -155
  16. nemo_evaluator_launcher/common/printing_utils.py +25 -12
  17. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  18. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  19. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  20. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  21. nemo_evaluator_launcher/executors/base.py +31 -1
  22. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  23. nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
  24. nemo_evaluator_launcher/executors/local/executor.py +383 -24
  25. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  26. nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
  27. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  28. nemo_evaluator_launcher/exporters/utils.py +32 -46
  29. nemo_evaluator_launcher/package_info.py +1 -1
  30. nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  31. nemo_evaluator_launcher/resources/mapping.toml +64 -315
  32. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
  33. nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
  34. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
  35. nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
  36. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
  37. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
  38. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,63 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Utility functions for container metadata processing."""
17
+
18
+
19
+ def parse_container_image(container_image: str) -> tuple[str, str, str, str]:
20
+ """Parse a container image string into registry type, registry URL, repository, and tag.
21
+
22
+ Args:
23
+ container_image: Container image string (e.g., "nvcr.io/nvidia/eval-factory/simple-evals:25.10")
24
+
25
+ Returns:
26
+ Tuple of (registry_type, registry_url, repository, tag)
27
+ """
28
+ # Split tag from image
29
+ if ":" in container_image:
30
+ image_part, tag = container_image.rsplit(":", 1)
31
+ else:
32
+ image_part = container_image
33
+ tag = "latest"
34
+
35
+ # Parse registry and repository
36
+ parts = image_part.split("/")
37
+ if len(parts) < 2:
38
+ raise ValueError(f"Invalid container image format: {container_image}")
39
+
40
+ # Check if first part is a registry (contains '.' or is 'localhost')
41
+ if "." in parts[0] or parts[0] == "localhost":
42
+ registry_host = parts[0]
43
+ # Determine registry type
44
+ if "gitlab" in registry_host.lower():
45
+ registry_type = "gitlab"
46
+ elif "nvcr.io" in registry_host:
47
+ registry_type = "nvcr"
48
+ else:
49
+ registry_type = "nvcr" # Default to nvcr for other registries
50
+
51
+ # Check if registry has a port
52
+ if ":" in registry_host:
53
+ registry_url = registry_host
54
+ else:
55
+ registry_url = registry_host
56
+ repository = "/".join(parts[1:])
57
+ else:
58
+ # Default registry (Docker Hub)
59
+ registry_type = "nvcr"
60
+ registry_url = "registry-1.docker.io"
61
+ repository = image_part
62
+
63
+ return registry_type, registry_url, repository, tag
@@ -14,7 +14,6 @@
14
14
  # limitations under the License.
15
15
  #
16
16
  import base64
17
- import copy
18
17
  import datetime
19
18
  from dataclasses import dataclass
20
19
  from typing import Optional
@@ -22,6 +21,7 @@ from typing import Optional
22
21
  import yaml
23
22
  from omegaconf import DictConfig, OmegaConf
24
23
 
24
+ from nemo_evaluator_launcher.cli.version import get_versions
25
25
  from nemo_evaluator_launcher.common.logging_utils import logger
26
26
 
27
27
 
@@ -35,35 +35,97 @@ class CmdAndReadableComment:
35
35
  # A debuggale readable comment that can be passed along for accompanying
36
36
  # the actual command
37
37
  debug: str
38
+ # Whether the content might be potentially unsafe. This is a flag useful for
39
+ # downstream callers who want to raise exceptions e.g. when a script was
40
+ # saved that would execute this command.
41
+ is_potentially_unsafe: bool = False
38
42
 
39
43
 
40
- def _yaml_to_echo_command(
41
- yaml_str: str, filename: str = "config_ef.yaml"
42
- ) -> CmdAndReadableComment:
43
- """Create a safe (see below) echo command saving a yaml to file.
44
+ def _str_to_echo_command(str_to_save: str, filename: str) -> CmdAndReadableComment:
45
+ """Create a safe (see below) echo command saving a string to file.
44
46
 
45
47
  Safety in this context means the ability to pass such echo command through the
46
48
  `bash -c '...'` boundaries for example.
47
49
 
48
50
  Naturally, enconding with base64 creates debuggability issues. For that, the second
49
- output of the function is the yaml string with bash comment signs prepended.
51
+ output of the function is the string with bash comment signs prepended.
50
52
  """
51
- yaml_str_b64 = base64.b64encode(yaml_str.encode("utf-8")).decode("utf-8")
53
+ str_to_save_b64 = base64.b64encode(str_to_save.encode("utf-8")).decode("utf-8")
52
54
  debug_str = "\n".join(
53
- [f"# Contents of {filename}"] + ["# " + s for s in yaml_str.splitlines()]
55
+ [f"# Contents of {filename}"] + ["# " + s for s in str_to_save.splitlines()]
54
56
  )
55
57
  return CmdAndReadableComment(
56
- cmd=f'echo "{yaml_str_b64}" | base64 -d > {filename}', debug=debug_str
58
+ cmd=f'echo "{str_to_save_b64}" | base64 -d > {filename}', debug=debug_str
57
59
  )
58
60
 
59
61
 
62
+ def _set_nested_optionally_overriding(
63
+ d: dict, keys: list[str], val: object, *, override_if_exists: bool = False
64
+ ):
65
+ """Sets d[...keys....] = value, creating keys all the way"""
66
+ temp = d
67
+ for key in keys[:-1]:
68
+ temp = temp.setdefault(key, {})
69
+ if override_if_exists or keys[-1] not in temp:
70
+ temp[keys[-1]] = val
71
+
72
+
73
+ _MIGRATION_MESSAGE = """
74
+ `overrides` field is no longer supported. Use `nemo_evaluator_config` field instead, e.g.:
75
+
76
+ 1. If you are using overrides in your yaml config, replace:
77
+
78
+ ```yaml
79
+ evaluation:
80
+ overrides:
81
+ config.params.temperature: 0.6
82
+ config.params.top_p: 0.95
83
+ ```
84
+
85
+ with:
86
+
87
+ ```yaml
88
+ evaluation:
89
+ nemo_evaluator_config:
90
+ config:
91
+ params:
92
+ temperature: 0.6
93
+ top_p: 0.95
94
+ ```
95
+
96
+ 2. If you are using overrides in your cli command, replace:
97
+
98
+ ```bash
99
+ nemo-evaluator-launcher run --config my_config.yaml \\
100
+ -o evaluation.overrides.config.params.temperature=0.6 \\
101
+ -o evaluation.overrides.config.params.top_p=0.95
102
+ ```
103
+
104
+ with:
105
+
106
+ ```bash
107
+ nemo-evaluator-launcher run --config my_config.yaml \\
108
+ -o evaluation.nemo_evaluator_config.config.params.temperature=0.6 \\
109
+ -o evaluation.nemo_evaluator_config.config.params.top_p=0.95
110
+ ```
111
+ """
112
+
113
+
60
114
  def get_eval_factory_config(
61
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
115
+ cfg: DictConfig,
116
+ user_task_config: DictConfig,
62
117
  ) -> dict:
63
118
  """Extract config fields for eval factory.
64
119
 
65
120
  This function extracts the config field similar to how overrides are handled.
121
+
122
+ It applies task-level overrides to the global overrides.
66
123
  """
124
+
125
+ if cfg.evaluation.get("overrides") or user_task_config.get("overrides"):
126
+ raise ValueError(_MIGRATION_MESSAGE)
127
+
128
+ logger.debug("Getting nemo evaluator merged config")
67
129
  # Extract config fields similar to overrides - convert to basic Python types first
68
130
  # Support both new and old format for backward compatibility
69
131
  cfg_config = cfg.evaluation.get("nemo_evaluator_config") or cfg.evaluation.get(
@@ -80,66 +142,146 @@ def get_eval_factory_config(
80
142
  user_config = OmegaConf.to_container(user_config, resolve=True)
81
143
 
82
144
  # Merge the configs
83
- config_fields = copy.deepcopy(cfg_config or {})
84
- config_fields.update(user_config or {})
145
+ merged_nemo_evaluator_config: dict = OmegaConf.to_container(
146
+ OmegaConf.merge(cfg_config, user_config)
147
+ )
148
+
149
+ logger.debug(
150
+ "Merged nemo evaluator config, not final",
151
+ source_global_cfg=cfg_config,
152
+ source_task_config=user_config,
153
+ result=merged_nemo_evaluator_config,
154
+ )
85
155
 
86
- return config_fields
156
+ return merged_nemo_evaluator_config
87
157
 
88
158
 
89
159
  def get_eval_factory_command(
90
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
160
+ cfg: DictConfig,
161
+ user_task_config: DictConfig,
162
+ task_definition: dict,
91
163
  ) -> CmdAndReadableComment:
92
- config_fields = get_eval_factory_config(cfg, user_task_config, task_definition)
93
-
94
- overrides = copy.deepcopy(dict(cfg.evaluation.get("overrides", {})))
95
- overrides.update(dict(user_task_config.get("overrides", {})))
96
- # NOTE(dfridman): Temporary fix to make sure that the overrides arg is not split into multiple lines.
97
- # Consider passing a JSON object on Eval Factory side
98
- overrides = {
99
- k: (v.strip("\n") if isinstance(v, str) else v) for k, v in overrides.items()
100
- }
101
- overrides_str = ",".join([f"{k}={v}" for k, v in overrides.items()])
102
- model_url = get_endpoint_url(cfg, user_task_config, task_definition)
164
+ # This gets the eval_factory_config merged from both top-level and task-level.
165
+ merged_nemo_evaluator_config = get_eval_factory_config(
166
+ cfg,
167
+ user_task_config,
168
+ )
103
169
 
104
- model_id = get_served_model_name(cfg)
105
- model_type = task_definition["endpoint_type"]
106
- eval_type = task_definition["task"]
170
+ # We now prepare the config to be passed to `nemo-evaluator` command.
171
+ _set_nested_optionally_overriding(
172
+ merged_nemo_evaluator_config,
173
+ ["target", "api_endpoint", "url"],
174
+ get_endpoint_url(
175
+ cfg,
176
+ merged_nemo_evaluator_config=merged_nemo_evaluator_config,
177
+ endpoint_type=task_definition["endpoint_type"],
178
+ ),
179
+ )
180
+ _set_nested_optionally_overriding(
181
+ merged_nemo_evaluator_config,
182
+ ["target", "api_endpoint", "model_id"],
183
+ get_served_model_name(cfg),
184
+ )
185
+ _set_nested_optionally_overriding(
186
+ merged_nemo_evaluator_config,
187
+ ["target", "api_endpoint", "type"],
188
+ task_definition["endpoint_type"],
189
+ )
190
+ _set_nested_optionally_overriding(
191
+ merged_nemo_evaluator_config,
192
+ ["config", "type"],
193
+ task_definition["task"],
194
+ )
195
+ _set_nested_optionally_overriding(
196
+ merged_nemo_evaluator_config,
197
+ ["config", "output_dir"],
198
+ "/results",
199
+ )
200
+ # FIXME(martas): update to api_key_name after 25.12 is released
201
+ _set_nested_optionally_overriding(
202
+ merged_nemo_evaluator_config,
203
+ ["target", "api_endpoint", "api_key"],
204
+ "API_KEY",
205
+ )
206
+ _set_nested_optionally_overriding(
207
+ merged_nemo_evaluator_config,
208
+ [
209
+ "metadata",
210
+ "launcher_resolved_config",
211
+ ],
212
+ OmegaConf.to_container(cfg, resolve=True),
213
+ )
214
+ _set_nested_optionally_overriding(
215
+ merged_nemo_evaluator_config,
216
+ ["metadata", "versioning"],
217
+ get_versions(),
218
+ )
107
219
 
108
- create_file_cmd = _yaml_to_echo_command(
109
- yaml.safe_dump(config_fields), "config_ef.yaml"
220
+ # Now get the pre_cmd either from `evaluation.pre_cmd` or task-level pre_cmd. Note the
221
+ # order -- task level wins.
222
+ pre_cmd: str = (
223
+ user_task_config.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
110
224
  )
111
- eval_command = f"""cmd=$([[ $(command -v nemo-evaluator) ]] && echo 'nemo-evaluator' || echo 'eval-factory') && $cmd run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
112
225
 
113
- if overrides:
114
- eval_command = f"{eval_command} --overrides {overrides_str}"
226
+ is_potentially_unsafe = False
227
+ if pre_cmd:
228
+ logger.warning(
229
+ "Found non-empty pre_cmd that might be a security risk if executed. "
230
+ "Setting `is_potentially_unsafe` to `True`",
231
+ pre_cmd=pre_cmd,
232
+ )
233
+ is_potentially_unsafe = True
234
+ _set_nested_optionally_overriding(
235
+ merged_nemo_evaluator_config,
236
+ ["metadata", "pre_cmd"],
237
+ pre_cmd,
238
+ )
239
+
240
+ create_pre_script_cmd = _str_to_echo_command(pre_cmd, filename="pre_cmd.sh")
241
+
242
+ create_yaml_cmd = _str_to_echo_command(
243
+ yaml.safe_dump(merged_nemo_evaluator_config), "config_ef.yaml"
244
+ )
245
+
246
+ # NOTE: we use `source` to allow tricks like exports etc (if needed) -- it runs in the same
247
+ # shell as the command.
248
+ eval_command = (
249
+ "cmd=$(command -v nemo-evaluator >/dev/null 2>&1 && echo nemo-evaluator || echo eval-factory) "
250
+ + "&& source pre_cmd.sh "
251
+ + "&& $cmd run_eval --run_config config_ef.yaml"
252
+ )
115
253
 
116
254
  # We return both the command and the debugging base64-decoded strings, useful
117
255
  # for exposing when building scripts.
118
256
  return CmdAndReadableComment(
119
- cmd=create_file_cmd.cmd + " && " + eval_command, debug=create_file_cmd.debug
257
+ cmd=create_pre_script_cmd.cmd
258
+ + " && "
259
+ + create_yaml_cmd.cmd
260
+ + " && "
261
+ + eval_command,
262
+ debug=create_pre_script_cmd.debug + "\n\n" + create_yaml_cmd.debug,
263
+ is_potentially_unsafe=is_potentially_unsafe,
120
264
  )
121
265
 
122
266
 
123
267
  def get_endpoint_url(
124
- cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
268
+ cfg: DictConfig,
269
+ merged_nemo_evaluator_config: dict,
270
+ endpoint_type: str,
125
271
  ) -> str:
126
272
  def apply_url_override(url: str) -> str:
127
273
  """Apply user URL override if provided."""
128
- nemo_evaluator_config_url = user_task_config.get(
129
- "nemo_evaluator_config", {}
130
- ).get("target.api_endpoint.url", None)
131
-
132
- override_url = user_task_config.get("overrides", {}).get(
133
- "config.target.api_endpoint.url", None
134
- )
135
- return (
136
- override_url
137
- if override_url is not None
138
- else nemo_evaluator_config_url
139
- if nemo_evaluator_config_url is not None
140
- else url
274
+ nemo_evaluator_config_url = (
275
+ merged_nemo_evaluator_config.get("target", {})
276
+ .get("api_endpoint", {})
277
+ .get("url", None)
141
278
  )
142
279
 
280
+ if nemo_evaluator_config_url:
281
+ return nemo_evaluator_config_url
282
+
283
+ return url
284
+
143
285
  if cfg.deployment.type == "none":
144
286
  # For deployment: none, use target URL regardless of executor type
145
287
  if OmegaConf.is_missing(cfg.target.api_endpoint, "url"):
@@ -160,9 +302,16 @@ def get_endpoint_url(
160
302
 
161
303
  else:
162
304
  # Local executor - use localhost
163
- task_endpoint_type = task_definition["endpoint_type"]
164
- endpoint_uri = cfg.deployment.endpoints[task_endpoint_type]
165
- endpoint_url = f"http://127.0.0.1:{cfg.deployment.port}{endpoint_uri}"
305
+ endpoint_uri = cfg.deployment.endpoints[endpoint_type]
306
+
307
+ # Use HAProxy port if multiple_instances is enabled
308
+ if cfg.deployment.get("multiple_instances", False):
309
+ proxy_config = cfg.execution.get("proxy", {}).get("config", {})
310
+ port = proxy_config.get("haproxy_port", 5009)
311
+ else:
312
+ port = cfg.deployment.port
313
+
314
+ endpoint_url = f"http://127.0.0.1:{port}{endpoint_uri}"
166
315
  return endpoint_url
167
316
 
168
317
 
@@ -61,8 +61,9 @@ import structlog
61
61
  # both are unset, default would be used.
62
62
  _LOG_LEVEL_ENV_VAR = "NEMO_EVALUATOR_LOG_LEVEL"
63
63
  _DEFAULT_LOG_LEVEL = "WARNING"
64
- _SENSITIVE_KEY_SUBSTRINGS = {
65
- # Keep minimal, broad substrings (normalized: lowercased, no spaces/_/-)
64
+ _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED = {
65
+ # Keep minimal, broad substrings
66
+ # NOTE: normalized: lowercased, no spaces/_/-
66
67
  "authorization", # covers proxy-authorization, etc.
67
68
  "apikey", # covers api_key, api-key, x-api-key, nvidia_api_key, ...
68
69
  "accesskey", # covers access_key / access-key
@@ -73,6 +74,10 @@ _SENSITIVE_KEY_SUBSTRINGS = {
73
74
  "pwd", # common shorthand
74
75
  "passwd", # common variant
75
76
  }
77
+ _ALLOWLISTED_KEYS_SUBSTRINGS = {
78
+ # NOTE: non-normalized (for allowlisting we want more control)
79
+ "_tokens", # This likely would allow us to not redact useful stuff like `limit_tokens`, `max_new_tokens`
80
+ }
76
81
 
77
82
 
78
83
  def _mask(val: object) -> str:
@@ -91,8 +96,11 @@ def _normalize(name: object) -> str:
91
96
 
92
97
 
93
98
  def _is_sensitive_key(key: object) -> bool:
94
- k = _normalize(key)
95
- return any(substr in k for substr in _SENSITIVE_KEY_SUBSTRINGS)
99
+ k_norm = _normalize(key)
100
+ k_non_norm = str(key)
101
+ return any(
102
+ substr in k_norm for substr in _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED
103
+ ) and not any(substr in k_non_norm for substr in _ALLOWLISTED_KEYS_SUBSTRINGS)
96
104
 
97
105
 
98
106
  def _redact_mapping(m: dict) -> dict:
@@ -263,6 +271,9 @@ def _configure_structlog() -> None:
263
271
  structlog.processors.UnicodeDecoder(),
264
272
  ]
265
273
 
274
+ # Check if stderr is a TTY to determine if colors should be enabled
275
+ colors_enabled = sys.stderr.isatty()
276
+
266
277
  logging.config.dictConfig(
267
278
  {
268
279
  "version": 1,
@@ -273,7 +284,7 @@ def _configure_structlog() -> None:
273
284
  "()": "structlog.stdlib.ProcessorFormatter",
274
285
  "processors": [
275
286
  *shared_processors,
276
- MainConsoleRenderer(colors=True),
287
+ MainConsoleRenderer(colors=colors_enabled),
277
288
  ],
278
289
  },
279
290
  # Formatter for plain file output