nemo-evaluator-launcher 0.1.19__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/PKG-INFO +2 -2
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/pyproject.toml +1 -1
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/helpers.py +106 -32
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/logging_utils.py +12 -4
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/lepton/executor.py +1 -1
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/slurm/executor.py +10 -1
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/package_info.py +1 -1
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/resources/mapping.toml +51 -15
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +2 -2
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/requires.txt +1 -1
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/README.md +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/setup.cfg +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/api/functional.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/api/types.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/api/utils.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/export.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/info.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/main.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/run.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/status.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/cli/version.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/execdb.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/common/printing_utils.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/base.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/local/executor.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/local/run.template.sh +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/base.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/gsheets.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/local.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/mlflow.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/utils.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher/exporters/wandb.py +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
- {nemo_evaluator_launcher-0.1.19 → nemo_evaluator_launcher-0.1.26}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nemo-evaluator-launcher
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.26
|
|
4
4
|
Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
|
|
5
5
|
Author: NVIDIA
|
|
6
6
|
Author-email: nemo-toolkit@nvidia.com
|
|
@@ -478,7 +478,7 @@ Requires-Dist: mlflow>=2.8.0; extra == "mlflow"
|
|
|
478
478
|
Provides-Extra: wandb
|
|
479
479
|
Requires-Dist: wandb>=0.15.0; extra == "wandb"
|
|
480
480
|
Provides-Extra: gsheets
|
|
481
|
-
Requires-Dist:
|
|
481
|
+
Requires-Dist: gspread>=5.0.0; extra == "gsheets"
|
|
482
482
|
Provides-Extra: exporters
|
|
483
483
|
Requires-Dist: mlflow; extra == "exporters"
|
|
484
484
|
Requires-Dist: wandb; extra == "exporters"
|
|
@@ -40,7 +40,7 @@ repository = "https://github.com/NVIDIA-NeMo/Evaluator/packages/nemo-evaluator-l
|
|
|
40
40
|
[project.optional-dependencies]
|
|
41
41
|
mlflow = ["mlflow>=2.8.0"]
|
|
42
42
|
wandb = ["wandb>=0.15.0"]
|
|
43
|
-
gsheets = ["
|
|
43
|
+
gsheets = ["gspread>=5.0.0"]
|
|
44
44
|
exporters = ["mlflow", "wandb", "gsheets"]
|
|
45
45
|
all = ["mlflow", "wandb", "gsheets"]
|
|
46
46
|
|
|
@@ -57,13 +57,38 @@ def _yaml_to_echo_command(
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
|
|
60
|
+
def _set_nested_optionally_overriding(
|
|
61
|
+
d: dict, keys: list[str], val: object, *, override_if_exists: bool = False
|
|
62
|
+
):
|
|
63
|
+
"""Sets d[...keys....] = value, creating keys all the way"""
|
|
64
|
+
temp = d
|
|
65
|
+
for key in keys[:-1]:
|
|
66
|
+
temp = temp.setdefault(key, {})
|
|
67
|
+
if override_if_exists or keys[-1] not in temp:
|
|
68
|
+
temp[keys[-1]] = val
|
|
69
|
+
|
|
70
|
+
|
|
60
71
|
def get_eval_factory_config(
|
|
61
|
-
cfg: DictConfig,
|
|
72
|
+
cfg: DictConfig,
|
|
73
|
+
user_task_config: DictConfig,
|
|
62
74
|
) -> dict:
|
|
63
75
|
"""Extract config fields for eval factory.
|
|
64
76
|
|
|
65
77
|
This function extracts the config field similar to how overrides are handled.
|
|
78
|
+
|
|
79
|
+
Overrides will be start to be deprecated (or not, but at least a warning will be logged).
|
|
66
80
|
"""
|
|
81
|
+
|
|
82
|
+
if cfg.evaluation.get("overrides") or user_task_config.get("overrides"):
|
|
83
|
+
# TODO(agronskiy): start removing overrides, test `test_start_deprecating_overrides`
|
|
84
|
+
# will start failing soon.
|
|
85
|
+
logger.warning(
|
|
86
|
+
"We are deprecating using old-style dot-delimited overrides "
|
|
87
|
+
"in favour of `nemo_evaluator_config` field. Please check "
|
|
88
|
+
"the documentation."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
logger.debug("Getting nemo evaluator merged config")
|
|
67
92
|
# Extract config fields similar to overrides - convert to basic Python types first
|
|
68
93
|
# Support both new and old format for backward compatibility
|
|
69
94
|
cfg_config = cfg.evaluation.get("nemo_evaluator_config") or cfg.evaluation.get(
|
|
@@ -80,17 +105,73 @@ def get_eval_factory_config(
|
|
|
80
105
|
user_config = OmegaConf.to_container(user_config, resolve=True)
|
|
81
106
|
|
|
82
107
|
# Merge the configs
|
|
83
|
-
|
|
84
|
-
|
|
108
|
+
merged_nemo_evaluator_config: dict = OmegaConf.to_container(
|
|
109
|
+
OmegaConf.merge(cfg_config, user_config)
|
|
110
|
+
)
|
|
85
111
|
|
|
86
|
-
|
|
112
|
+
logger.debug(
|
|
113
|
+
"Merged nemo evaluator config, not final",
|
|
114
|
+
source_global_cfg=cfg_config,
|
|
115
|
+
source_task_config=user_config,
|
|
116
|
+
result=merged_nemo_evaluator_config,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return merged_nemo_evaluator_config
|
|
87
120
|
|
|
88
121
|
|
|
89
122
|
def get_eval_factory_command(
|
|
90
123
|
cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
|
|
91
124
|
) -> CmdAndReadableComment:
|
|
92
|
-
|
|
125
|
+
merged_nemo_evaluator_config = get_eval_factory_config(
|
|
126
|
+
cfg,
|
|
127
|
+
user_task_config,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# We now prepare the config to be passed to `nemo-evaluator` command.
|
|
131
|
+
_set_nested_optionally_overriding(
|
|
132
|
+
merged_nemo_evaluator_config,
|
|
133
|
+
["target", "api_endpoint", "url"],
|
|
134
|
+
get_endpoint_url(
|
|
135
|
+
cfg,
|
|
136
|
+
merged_nemo_evaluator_config=merged_nemo_evaluator_config,
|
|
137
|
+
endpoint_type=task_definition["endpoint_type"],
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
_set_nested_optionally_overriding(
|
|
141
|
+
merged_nemo_evaluator_config,
|
|
142
|
+
["target", "api_endpoint", "model_id"],
|
|
143
|
+
get_served_model_name(cfg),
|
|
144
|
+
)
|
|
145
|
+
_set_nested_optionally_overriding(
|
|
146
|
+
merged_nemo_evaluator_config,
|
|
147
|
+
["target", "api_endpoint", "type"],
|
|
148
|
+
task_definition["endpoint_type"],
|
|
149
|
+
)
|
|
150
|
+
_set_nested_optionally_overriding(
|
|
151
|
+
merged_nemo_evaluator_config,
|
|
152
|
+
["config", "type"],
|
|
153
|
+
task_definition["task"],
|
|
154
|
+
)
|
|
155
|
+
_set_nested_optionally_overriding(
|
|
156
|
+
merged_nemo_evaluator_config,
|
|
157
|
+
["config", "output_dir"],
|
|
158
|
+
"/results",
|
|
159
|
+
)
|
|
160
|
+
_set_nested_optionally_overriding(
|
|
161
|
+
merged_nemo_evaluator_config,
|
|
162
|
+
["target", "api_endpoint", "api_key"],
|
|
163
|
+
"API_KEY",
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
create_file_cmd = _yaml_to_echo_command(
|
|
167
|
+
yaml.safe_dump(merged_nemo_evaluator_config), "config_ef.yaml"
|
|
168
|
+
)
|
|
169
|
+
eval_command = (
|
|
170
|
+
"cmd=$(command -v nemo-evaluator >/dev/null 2>&1 && echo nemo-evaluator || echo eval-factory) "
|
|
171
|
+
+ "&& $cmd run_eval --run_config config_ef.yaml"
|
|
172
|
+
)
|
|
93
173
|
|
|
174
|
+
# NOTE: see note and test about deprecating that.
|
|
94
175
|
overrides = copy.deepcopy(dict(cfg.evaluation.get("overrides", {})))
|
|
95
176
|
overrides.update(dict(user_task_config.get("overrides", {})))
|
|
96
177
|
# NOTE(dfridman): Temporary fix to make sure that the overrides arg is not split into multiple lines.
|
|
@@ -99,18 +180,7 @@ def get_eval_factory_command(
|
|
|
99
180
|
k: (v.strip("\n") if isinstance(v, str) else v) for k, v in overrides.items()
|
|
100
181
|
}
|
|
101
182
|
overrides_str = ",".join([f"{k}={v}" for k, v in overrides.items()])
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
model_id = get_served_model_name(cfg)
|
|
105
|
-
model_type = task_definition["endpoint_type"]
|
|
106
|
-
eval_type = task_definition["task"]
|
|
107
|
-
|
|
108
|
-
create_file_cmd = _yaml_to_echo_command(
|
|
109
|
-
yaml.safe_dump(config_fields), "config_ef.yaml"
|
|
110
|
-
)
|
|
111
|
-
eval_command = f"""cmd=$([[ $(command -v nemo-evaluator) ]] && echo 'nemo-evaluator' || echo 'eval-factory') && $cmd run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
|
|
112
|
-
|
|
113
|
-
if overrides:
|
|
183
|
+
if overrides_str:
|
|
114
184
|
eval_command = f"{eval_command} --overrides {overrides_str}"
|
|
115
185
|
|
|
116
186
|
# We return both the command and the debugging base64-decoded strings, useful
|
|
@@ -121,24 +191,29 @@ def get_eval_factory_command(
|
|
|
121
191
|
|
|
122
192
|
|
|
123
193
|
def get_endpoint_url(
|
|
124
|
-
cfg: DictConfig,
|
|
194
|
+
cfg: DictConfig,
|
|
195
|
+
merged_nemo_evaluator_config: dict,
|
|
196
|
+
endpoint_type: str,
|
|
125
197
|
) -> str:
|
|
126
198
|
def apply_url_override(url: str) -> str:
|
|
127
199
|
"""Apply user URL override if provided."""
|
|
128
|
-
nemo_evaluator_config_url =
|
|
129
|
-
"
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
override_url = user_task_config.get("overrides", {}).get(
|
|
133
|
-
"config.target.api_endpoint.url", None
|
|
200
|
+
nemo_evaluator_config_url = (
|
|
201
|
+
merged_nemo_evaluator_config.get("target", {})
|
|
202
|
+
.get("api_endpoint", {})
|
|
203
|
+
.get("url", None)
|
|
134
204
|
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
205
|
+
|
|
206
|
+
if nemo_evaluator_config_url:
|
|
207
|
+
return nemo_evaluator_config_url
|
|
208
|
+
|
|
209
|
+
# Being deprecated, see `get_eval_factory_config` message.
|
|
210
|
+
overrides_old_style_url = merged_nemo_evaluator_config.get("overrides", {}).get(
|
|
211
|
+
"target.api_endpoint.url", None
|
|
141
212
|
)
|
|
213
|
+
if overrides_old_style_url:
|
|
214
|
+
return overrides_old_style_url
|
|
215
|
+
|
|
216
|
+
return url
|
|
142
217
|
|
|
143
218
|
if cfg.deployment.type == "none":
|
|
144
219
|
# For deployment: none, use target URL regardless of executor type
|
|
@@ -160,8 +235,7 @@ def get_endpoint_url(
|
|
|
160
235
|
|
|
161
236
|
else:
|
|
162
237
|
# Local executor - use localhost
|
|
163
|
-
|
|
164
|
-
endpoint_uri = cfg.deployment.endpoints[task_endpoint_type]
|
|
238
|
+
endpoint_uri = cfg.deployment.endpoints[endpoint_type]
|
|
165
239
|
endpoint_url = f"http://127.0.0.1:{cfg.deployment.port}{endpoint_uri}"
|
|
166
240
|
return endpoint_url
|
|
167
241
|
|
|
@@ -61,8 +61,9 @@ import structlog
|
|
|
61
61
|
# both are unset, default would be used.
|
|
62
62
|
_LOG_LEVEL_ENV_VAR = "NEMO_EVALUATOR_LOG_LEVEL"
|
|
63
63
|
_DEFAULT_LOG_LEVEL = "WARNING"
|
|
64
|
-
|
|
65
|
-
# Keep minimal, broad substrings
|
|
64
|
+
_SENSITIVE_KEY_SUBSTRINGS_NORMALIZED = {
|
|
65
|
+
# Keep minimal, broad substrings
|
|
66
|
+
# NOTE: normalized: lowercased, no spaces/_/-
|
|
66
67
|
"authorization", # covers proxy-authorization, etc.
|
|
67
68
|
"apikey", # covers api_key, api-key, x-api-key, nvidia_api_key, ...
|
|
68
69
|
"accesskey", # covers access_key / access-key
|
|
@@ -73,6 +74,10 @@ _SENSITIVE_KEY_SUBSTRINGS = {
|
|
|
73
74
|
"pwd", # common shorthand
|
|
74
75
|
"passwd", # common variant
|
|
75
76
|
}
|
|
77
|
+
_ALLOWLISTED_KEYS_SUBSTRINGS = {
|
|
78
|
+
# NOTE: non-normalized (for allowlisting we want more control)
|
|
79
|
+
"_tokens", # This likely would allow us to not redact useful stuff like `limit_tokens`, `max_new_tokens`
|
|
80
|
+
}
|
|
76
81
|
|
|
77
82
|
|
|
78
83
|
def _mask(val: object) -> str:
|
|
@@ -91,8 +96,11 @@ def _normalize(name: object) -> str:
|
|
|
91
96
|
|
|
92
97
|
|
|
93
98
|
def _is_sensitive_key(key: object) -> bool:
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
k_norm = _normalize(key)
|
|
100
|
+
k_non_norm = str(key)
|
|
101
|
+
return any(
|
|
102
|
+
substr in k_norm for substr in _SENSITIVE_KEY_SUBSTRINGS_NORMALIZED
|
|
103
|
+
) and not any(substr in k_non_norm for substr in _ALLOWLISTED_KEYS_SUBSTRINGS)
|
|
96
104
|
|
|
97
105
|
|
|
98
106
|
def _redact_mapping(m: dict) -> dict:
|
|
@@ -610,7 +610,7 @@ class LeptonExecutor(BaseExecutor):
|
|
|
610
610
|
job_state = lepton_status.get("state", "Unknown")
|
|
611
611
|
|
|
612
612
|
# Map Lepton job states to our execution states
|
|
613
|
-
if job_state
|
|
613
|
+
if job_state in ["Succeeded", "Completed"]:
|
|
614
614
|
state = ExecutionState.SUCCESS
|
|
615
615
|
elif job_state in ["Running", "Pending", "Starting"]:
|
|
616
616
|
state = ExecutionState.RUNNING
|
|
@@ -42,6 +42,7 @@ from nemo_evaluator_launcher.common.helpers import (
|
|
|
42
42
|
get_api_key_name,
|
|
43
43
|
get_endpoint_url,
|
|
44
44
|
get_eval_factory_command,
|
|
45
|
+
get_eval_factory_config,
|
|
45
46
|
get_eval_factory_dataset_size_from_run_config,
|
|
46
47
|
get_health_url,
|
|
47
48
|
get_timestamp_string,
|
|
@@ -453,7 +454,15 @@ def _create_slurm_sbatch_script(
|
|
|
453
454
|
# get task from mapping, overrides, urls
|
|
454
455
|
tasks_mapping = load_tasks_mapping()
|
|
455
456
|
task_definition = get_task_from_mapping(task.name, tasks_mapping)
|
|
456
|
-
|
|
457
|
+
|
|
458
|
+
# Create merged config for get_endpoint_url
|
|
459
|
+
merged_nemo_evaluator_config = get_eval_factory_config(cfg, task)
|
|
460
|
+
health_url = get_health_url(
|
|
461
|
+
cfg,
|
|
462
|
+
get_endpoint_url(
|
|
463
|
+
cfg, merged_nemo_evaluator_config, task_definition["endpoint_type"]
|
|
464
|
+
),
|
|
465
|
+
)
|
|
457
466
|
|
|
458
467
|
# TODO(public release): convert to template
|
|
459
468
|
s = "#!/bin/bash\n"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# NOTE(agronskiy): checked parity
|
|
2
2
|
[lm-evaluation-harness]
|
|
3
|
-
container = "nvcr.io/nvidia/eval-factory/lm-evaluation-harness:25.
|
|
3
|
+
container = "nvcr.io/nvidia/eval-factory/lm-evaluation-harness:25.10"
|
|
4
4
|
|
|
5
5
|
[lm-evaluation-harness.tasks.chat.ifeval]
|
|
6
6
|
required_env_vars = []
|
|
@@ -124,7 +124,7 @@ required_env_vars = []
|
|
|
124
124
|
###############################################################################
|
|
125
125
|
# NOTE(agronskiy): checked parity
|
|
126
126
|
[mtbench]
|
|
127
|
-
container = "nvcr.io/nvidia/eval-factory/mtbench:25.
|
|
127
|
+
container = "nvcr.io/nvidia/eval-factory/mtbench:25.10"
|
|
128
128
|
|
|
129
129
|
[mtbench.tasks.chat.mtbench]
|
|
130
130
|
|
|
@@ -134,7 +134,7 @@ container = "nvcr.io/nvidia/eval-factory/mtbench:25.08.1"
|
|
|
134
134
|
###############################################################################
|
|
135
135
|
# NOTE(agronskiy): checked parity
|
|
136
136
|
[ifbench]
|
|
137
|
-
container = "nvcr.io/nvidia/eval-factory/ifbench:25.
|
|
137
|
+
container = "nvcr.io/nvidia/eval-factory/ifbench:25.10"
|
|
138
138
|
|
|
139
139
|
[ifbench.tasks.chat.ifbench]
|
|
140
140
|
required_env_vars = []
|
|
@@ -142,7 +142,7 @@ required_env_vars = []
|
|
|
142
142
|
|
|
143
143
|
###############################################################################
|
|
144
144
|
[simple_evals]
|
|
145
|
-
container = "nvcr.io/nvidia/eval-factory/simple-evals:25.
|
|
145
|
+
container = "nvcr.io/nvidia/eval-factory/simple-evals:25.10"
|
|
146
146
|
|
|
147
147
|
[simple_evals.tasks.chat.gpqa_diamond]
|
|
148
148
|
required_env_vars = ["HF_TOKEN"]
|
|
@@ -213,7 +213,7 @@ required_env_vars = []
|
|
|
213
213
|
###############################################################################
|
|
214
214
|
# NOTE(agronskiy): checked parity
|
|
215
215
|
[bigcode-evaluation-harness]
|
|
216
|
-
container = "nvcr.io/nvidia/eval-factory/bigcode-evaluation-harness:25.
|
|
216
|
+
container = "nvcr.io/nvidia/eval-factory/bigcode-evaluation-harness:25.10"
|
|
217
217
|
|
|
218
218
|
[bigcode-evaluation-harness.tasks.chat.mbpp]
|
|
219
219
|
required_env_vars = []
|
|
@@ -226,12 +226,12 @@ required_env_vars = []
|
|
|
226
226
|
[bigcode-evaluation-harness.tasks.completions.humaneval]
|
|
227
227
|
required_env_vars = []
|
|
228
228
|
|
|
229
|
-
[bigcode-evaluation-harness.tasks.
|
|
229
|
+
[bigcode-evaluation-harness.tasks.chat.humaneval_instruct]
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
###############################################################################
|
|
233
233
|
[livecodebench]
|
|
234
|
-
container = "nvcr.io/nvidia/eval-factory/livecodebench:25.
|
|
234
|
+
container = "nvcr.io/nvidia/eval-factory/livecodebench:25.10"
|
|
235
235
|
|
|
236
236
|
[livecodebench.tasks.chat.livecodebench_0724_0125]
|
|
237
237
|
required_env_vars = []
|
|
@@ -242,7 +242,7 @@ required_env_vars = []
|
|
|
242
242
|
|
|
243
243
|
###############################################################################
|
|
244
244
|
[scicode]
|
|
245
|
-
container = "nvcr.io/nvidia/eval-factory/scicode:25.
|
|
245
|
+
container = "nvcr.io/nvidia/eval-factory/scicode:25.10"
|
|
246
246
|
|
|
247
247
|
[scicode.tasks.chat.aa_scicode]
|
|
248
248
|
required_env_vars = []
|
|
@@ -250,7 +250,7 @@ required_env_vars = []
|
|
|
250
250
|
|
|
251
251
|
###############################################################################
|
|
252
252
|
[hle]
|
|
253
|
-
container = "nvcr.io/nvidia/eval-factory/hle:25.
|
|
253
|
+
container = "nvcr.io/nvidia/eval-factory/hle:25.10"
|
|
254
254
|
|
|
255
255
|
[hle.tasks.chat.hle]
|
|
256
256
|
required_env_vars = ["HF_TOKEN", "OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
|
|
@@ -258,7 +258,7 @@ required_env_vars = ["HF_TOKEN", "OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
|
|
|
258
258
|
|
|
259
259
|
###############################################################################
|
|
260
260
|
[bfcl]
|
|
261
|
-
container = "nvcr.io/nvidia/eval-factory/bfcl:25.
|
|
261
|
+
container = "nvcr.io/nvidia/eval-factory/bfcl:25.10"
|
|
262
262
|
|
|
263
263
|
[bfcl.tasks.chat.bfclv2_ast_prompting]
|
|
264
264
|
required_env_vars = []
|
|
@@ -267,9 +267,20 @@ required_env_vars = []
|
|
|
267
267
|
required_env_vars = []
|
|
268
268
|
|
|
269
269
|
|
|
270
|
+
###############################################################################
|
|
271
|
+
[profbench]
|
|
272
|
+
container = "nvcr.io/nvidia/eval-factory/profbench:25.10"
|
|
273
|
+
|
|
274
|
+
[profbench.tasks.chat.llm_judge]
|
|
275
|
+
required_env_vars = []
|
|
276
|
+
|
|
277
|
+
[profbench.tasks.chat.report_generation]
|
|
278
|
+
required_env_vars = []
|
|
279
|
+
|
|
280
|
+
|
|
270
281
|
###############################################################################
|
|
271
282
|
[vlmevalkit]
|
|
272
|
-
container = "nvcr.io/nvidia/eval-factory/vlmevalkit:25.
|
|
283
|
+
container = "nvcr.io/nvidia/eval-factory/vlmevalkit:25.10"
|
|
273
284
|
|
|
274
285
|
[vlmevalkit.tasks.vlm.ocrbench]
|
|
275
286
|
required_env_vars = []
|
|
@@ -286,15 +297,40 @@ required_env_vars = ["OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
|
|
|
286
297
|
|
|
287
298
|
###############################################################################
|
|
288
299
|
[garak]
|
|
289
|
-
container = "nvcr.io/nvidia/eval-factory/garak:25.
|
|
300
|
+
container = "nvcr.io/nvidia/eval-factory/garak:25.10"
|
|
290
301
|
|
|
291
302
|
[garak.tasks.chat.garak]
|
|
292
303
|
required_env_vars = []
|
|
293
304
|
|
|
305
|
+
###############################################################################
|
|
306
|
+
# NOTE(wprazuch): to verify if the tasks need any env var setting
|
|
307
|
+
[nemo_skills]
|
|
308
|
+
container = "nvcr.io/nvidia/eval-factory/nemo_skills:25.10"
|
|
309
|
+
|
|
310
|
+
[nemo_skills.tasks.chat.ns_aime2024]
|
|
311
|
+
required_env_vars = ["JUDGE_API_KEY"]
|
|
312
|
+
|
|
313
|
+
[nemo_skills.tasks.chat.ns_aime2025]
|
|
314
|
+
required_env_vars = []
|
|
315
|
+
|
|
316
|
+
[nemo_skills.tasks.chat.ns_bfcl_v3]
|
|
317
|
+
required_env_vars = []
|
|
318
|
+
|
|
319
|
+
[nemo_skills.tasks.chat.ns_gpqa]
|
|
320
|
+
required_env_vars = ["HF_TOKEN"]
|
|
321
|
+
|
|
322
|
+
[nemo_skills.tasks.chat.ns_hle]
|
|
323
|
+
required_env_vars = []
|
|
324
|
+
|
|
325
|
+
[nemo_skills.tasks.chat.ns_mmlu]
|
|
326
|
+
required_env_vars = ["HF_TOKEN"]
|
|
327
|
+
|
|
328
|
+
[nemo_skills.tasks.chat.ns_mmlu_pro]
|
|
329
|
+
required_env_vars = ["HF_TOKEN"]
|
|
294
330
|
|
|
295
331
|
###############################################################################
|
|
296
332
|
[safety-harness]
|
|
297
|
-
container = "nvcr.io/nvidia/eval-factory/safety-harness:25.
|
|
333
|
+
container = "nvcr.io/nvidia/eval-factory/safety-harness:25.10"
|
|
298
334
|
|
|
299
335
|
[safety-harness.tasks.chat.aegis_v2]
|
|
300
336
|
required_env_vars = ["HF_TOKEN"]
|
|
@@ -303,7 +339,7 @@ required_env_vars = ["HF_TOKEN"]
|
|
|
303
339
|
###############################################################################
|
|
304
340
|
# NOTE(agronskiy): checked parity
|
|
305
341
|
[helm]
|
|
306
|
-
container = "nvcr.io/nvidia/eval-factory/helm:25.
|
|
342
|
+
container = "nvcr.io/nvidia/eval-factory/helm:25.10"
|
|
307
343
|
|
|
308
344
|
[helm.tasks.chat.medcalc_bench]
|
|
309
345
|
|
|
@@ -339,6 +375,6 @@ container = "nvcr.io/nvidia/eval-factory/helm:25.08.1"
|
|
|
339
375
|
###############################################################################
|
|
340
376
|
# NOTE(agronskiy): checked parity
|
|
341
377
|
[tooltalk]
|
|
342
|
-
container = "nvcr.io/nvidia/eval-factory/tooltalk:25.
|
|
378
|
+
container = "nvcr.io/nvidia/eval-factory/tooltalk:25.10"
|
|
343
379
|
|
|
344
380
|
[tooltalk.tasks.chat.tooltalk]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nemo-evaluator-launcher
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.26
|
|
4
4
|
Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
|
|
5
5
|
Author: NVIDIA
|
|
6
6
|
Author-email: nemo-toolkit@nvidia.com
|
|
@@ -478,7 +478,7 @@ Requires-Dist: mlflow>=2.8.0; extra == "mlflow"
|
|
|
478
478
|
Provides-Extra: wandb
|
|
479
479
|
Requires-Dist: wandb>=0.15.0; extra == "wandb"
|
|
480
480
|
Provides-Extra: gsheets
|
|
481
|
-
Requires-Dist:
|
|
481
|
+
Requires-Dist: gspread>=5.0.0; extra == "gsheets"
|
|
482
482
|
Provides-Extra: exporters
|
|
483
483
|
Requires-Dist: mlflow; extra == "exporters"
|
|
484
484
|
Requires-Dist: wandb; extra == "exporters"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|