nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +105 -1
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/main.py +12 -0
- nemo_evaluator_launcher/cli/run.py +73 -15
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/helpers.py +176 -43
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/printing_utils.py +7 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
- nemo_evaluator_launcher/executors/base.py +31 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
- nemo_evaluator_launcher/executors/lepton/executor.py +81 -1
- nemo_evaluator_launcher/executors/local/executor.py +377 -22
- nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
- nemo_evaluator_launcher/executors/slurm/executor.py +422 -59
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/utils.py +32 -46
- nemo_evaluator_launcher/package_info.py +1 -1
- nemo_evaluator_launcher/resources/mapping.toml +56 -15
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/METADATA +3 -3
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/RECORD +28 -26
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
Handles deployment and evaluation using Lepton endpoints with NIM containers.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import os
|
|
21
22
|
import time
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from typing import List
|
|
@@ -36,6 +37,7 @@ from nemo_evaluator_launcher.common.mapping import (
|
|
|
36
37
|
get_task_from_mapping,
|
|
37
38
|
load_tasks_mapping,
|
|
38
39
|
)
|
|
40
|
+
from nemo_evaluator_launcher.common.printing_utils import red
|
|
39
41
|
from nemo_evaluator_launcher.executors.base import (
|
|
40
42
|
BaseExecutor,
|
|
41
43
|
ExecutionState,
|
|
@@ -88,6 +90,23 @@ class LeptonExecutor(BaseExecutor):
|
|
|
88
90
|
# Generate invocation ID
|
|
89
91
|
invocation_id = generate_invocation_id()
|
|
90
92
|
|
|
93
|
+
# TODO(agronskiy): the structure of this executor differs from others,
|
|
94
|
+
# so the best place to check for unsafe commands yelids a bit of duplication.
|
|
95
|
+
# We can't use the get_eval_factory_command here because the port is not yet
|
|
96
|
+
# populated.
|
|
97
|
+
# Refactor the whole thing.
|
|
98
|
+
is_potentially_unsafe = False
|
|
99
|
+
for idx, task in enumerate(cfg.evaluation.tasks):
|
|
100
|
+
pre_cmd: str = task.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
|
|
101
|
+
if pre_cmd:
|
|
102
|
+
is_potentially_unsafe = True
|
|
103
|
+
break
|
|
104
|
+
|
|
105
|
+
# Check for deployment pre_cmd
|
|
106
|
+
deployment_pre_cmd: str = cfg.deployment.get("pre_cmd") or ""
|
|
107
|
+
if deployment_pre_cmd:
|
|
108
|
+
is_potentially_unsafe = True
|
|
109
|
+
|
|
91
110
|
# DRY-RUN mode
|
|
92
111
|
if dry_run:
|
|
93
112
|
output_dir = Path(cfg.execution.output_dir).absolute() / invocation_id
|
|
@@ -102,8 +121,34 @@ class LeptonExecutor(BaseExecutor):
|
|
|
102
121
|
else:
|
|
103
122
|
print(f"with endpoint type '{cfg.deployment.type}'")
|
|
104
123
|
|
|
124
|
+
if is_potentially_unsafe:
|
|
125
|
+
print(
|
|
126
|
+
red(
|
|
127
|
+
"\nFound `pre_cmd` (evaluation or deployment) which carries security risk. When running without --dry-run "
|
|
128
|
+
"make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
105
132
|
return invocation_id
|
|
106
133
|
|
|
134
|
+
if is_potentially_unsafe:
|
|
135
|
+
if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
|
|
136
|
+
logger.warning(
|
|
137
|
+
"Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
138
|
+
"is set, proceeding with caution."
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
else:
|
|
142
|
+
logger.error(
|
|
143
|
+
"Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
144
|
+
"is not set. This might carry security risk and unstable environments. "
|
|
145
|
+
"To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
|
|
146
|
+
)
|
|
147
|
+
raise AttributeError(
|
|
148
|
+
"Untrusted command found in config, make sure you trust and "
|
|
149
|
+
"set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
|
|
150
|
+
)
|
|
151
|
+
|
|
107
152
|
# For deployment: none, we use the existing endpoint for all tasks
|
|
108
153
|
if cfg.deployment.type == "none":
|
|
109
154
|
print("📌 Using existing endpoint (deployment: none)")
|
|
@@ -491,6 +536,33 @@ class LeptonExecutor(BaseExecutor):
|
|
|
491
536
|
|
|
492
537
|
job_mounts.append(mount_dict)
|
|
493
538
|
|
|
539
|
+
# Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
|
|
540
|
+
if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
|
|
541
|
+
"required_env_vars", []
|
|
542
|
+
):
|
|
543
|
+
# Get dataset directory from task config
|
|
544
|
+
if "dataset_dir" in task:
|
|
545
|
+
dataset_mount_host = task["dataset_dir"]
|
|
546
|
+
else:
|
|
547
|
+
raise ValueError(
|
|
548
|
+
f"{task.name} task requires a dataset_dir to be specified. "
|
|
549
|
+
f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
|
|
550
|
+
)
|
|
551
|
+
# Get container mount path (default to /datasets if not specified)
|
|
552
|
+
dataset_mount_container = task.get(
|
|
553
|
+
"dataset_mount_path", "/datasets"
|
|
554
|
+
)
|
|
555
|
+
# Add dataset mount to job mounts
|
|
556
|
+
# Lepton mount format: {"path": "/path/in/container", "mount_from": {"path": "/host/path"}}
|
|
557
|
+
job_mounts.append(
|
|
558
|
+
{
|
|
559
|
+
"path": dataset_mount_container,
|
|
560
|
+
"mount_from": {"path": dataset_mount_host},
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
# Add NEMO_EVALUATOR_DATASET_DIR environment variable
|
|
564
|
+
job_env_vars["NEMO_EVALUATOR_DATASET_DIR"] = dataset_mount_container
|
|
565
|
+
|
|
494
566
|
print(
|
|
495
567
|
f" - Storage: {len(job_mounts)} mount(s) with evaluation ID isolation"
|
|
496
568
|
)
|
|
@@ -610,7 +682,7 @@ class LeptonExecutor(BaseExecutor):
|
|
|
610
682
|
job_state = lepton_status.get("state", "Unknown")
|
|
611
683
|
|
|
612
684
|
# Map Lepton job states to our execution states
|
|
613
|
-
if job_state
|
|
685
|
+
if job_state in ["Succeeded", "Completed"]:
|
|
614
686
|
state = ExecutionState.SUCCESS
|
|
615
687
|
elif job_state in ["Running", "Pending", "Starting"]:
|
|
616
688
|
state = ExecutionState.RUNNING
|
|
@@ -859,6 +931,14 @@ def _dry_run_lepton(
|
|
|
859
931
|
td = get_task_from_mapping(task.name, tasks_mapping)
|
|
860
932
|
required = td.get("required_env_vars", []) or []
|
|
861
933
|
for var in required:
|
|
934
|
+
# Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic
|
|
935
|
+
if var == "NEMO_EVALUATOR_DATASET_DIR":
|
|
936
|
+
if "dataset_dir" not in task:
|
|
937
|
+
raise ValueError(
|
|
938
|
+
f"Task '{task.name}' requires dataset_dir to be specified. "
|
|
939
|
+
f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
|
|
940
|
+
)
|
|
941
|
+
continue
|
|
862
942
|
if var == "API_KEY":
|
|
863
943
|
if not (("API_KEY" in lepton_env_vars) or bool(api_key_name)):
|
|
864
944
|
raise ValueError(
|
|
@@ -26,7 +26,8 @@ import shlex
|
|
|
26
26
|
import shutil
|
|
27
27
|
import subprocess
|
|
28
28
|
import time
|
|
29
|
-
|
|
29
|
+
import warnings
|
|
30
|
+
from typing import Iterator, List, Optional, Tuple, Union
|
|
30
31
|
|
|
31
32
|
import jinja2
|
|
32
33
|
import yaml
|
|
@@ -39,15 +40,19 @@ from nemo_evaluator_launcher.common.execdb import (
|
|
|
39
40
|
generate_job_id,
|
|
40
41
|
)
|
|
41
42
|
from nemo_evaluator_launcher.common.helpers import (
|
|
43
|
+
get_api_key_name,
|
|
44
|
+
get_endpoint_url,
|
|
42
45
|
get_eval_factory_command,
|
|
43
46
|
get_eval_factory_dataset_size_from_run_config,
|
|
47
|
+
get_health_url,
|
|
44
48
|
get_timestamp_string,
|
|
45
49
|
)
|
|
50
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
46
51
|
from nemo_evaluator_launcher.common.mapping import (
|
|
47
52
|
get_task_from_mapping,
|
|
48
53
|
load_tasks_mapping,
|
|
49
54
|
)
|
|
50
|
-
from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey
|
|
55
|
+
from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey, red
|
|
51
56
|
from nemo_evaluator_launcher.executors.base import (
|
|
52
57
|
BaseExecutor,
|
|
53
58
|
ExecutionState,
|
|
@@ -70,14 +75,8 @@ class LocalExecutor(BaseExecutor):
|
|
|
70
75
|
str: The invocation ID for the evaluation run.
|
|
71
76
|
|
|
72
77
|
Raises:
|
|
73
|
-
NotImplementedError: If deployment is not 'none'.
|
|
74
78
|
RuntimeError: If the run script fails.
|
|
75
79
|
"""
|
|
76
|
-
if cfg.deployment.type != "none":
|
|
77
|
-
raise NotImplementedError(
|
|
78
|
-
f"type {cfg.deployment.type} is not implemented -- add deployment support"
|
|
79
|
-
)
|
|
80
|
-
|
|
81
80
|
# Check if docker is available (skip in dry_run mode)
|
|
82
81
|
if not dry_run and shutil.which("docker") is None:
|
|
83
82
|
raise RuntimeError(
|
|
@@ -97,12 +96,16 @@ class LocalExecutor(BaseExecutor):
|
|
|
97
96
|
evaluation_tasks = []
|
|
98
97
|
job_ids = []
|
|
99
98
|
|
|
100
|
-
|
|
99
|
+
run_template = jinja2.Template(
|
|
101
100
|
open(pathlib.Path(__file__).parent / "run.template.sh", "r").read()
|
|
102
101
|
)
|
|
103
102
|
|
|
104
103
|
execution_mode = cfg.execution.get("mode", "parallel")
|
|
105
104
|
if execution_mode == "parallel":
|
|
105
|
+
if cfg.deployment.type != "none":
|
|
106
|
+
raise ValueError(
|
|
107
|
+
f"Execution mode 'parallel' is not supported with deployment type: {cfg.deployment.type}. Use 'sequential' instead."
|
|
108
|
+
)
|
|
106
109
|
is_execution_mode_sequential = False
|
|
107
110
|
elif execution_mode == "sequential":
|
|
108
111
|
is_execution_mode_sequential = True
|
|
@@ -113,20 +116,76 @@ class LocalExecutor(BaseExecutor):
|
|
|
113
116
|
)
|
|
114
117
|
)
|
|
115
118
|
|
|
119
|
+
# Will accumulate if any task contains unsafe commands.
|
|
120
|
+
is_potentially_unsafe = False
|
|
121
|
+
|
|
122
|
+
deployment = None
|
|
123
|
+
|
|
116
124
|
for idx, task in enumerate(cfg.evaluation.tasks):
|
|
125
|
+
timestamp = get_timestamp_string()
|
|
117
126
|
task_definition = get_task_from_mapping(task.name, tasks_mapping)
|
|
118
127
|
|
|
128
|
+
if cfg.deployment.type != "none":
|
|
129
|
+
# container name
|
|
130
|
+
server_container_name = f"server-{task.name}-{timestamp}"
|
|
131
|
+
|
|
132
|
+
# health_url
|
|
133
|
+
health_url = get_health_url(
|
|
134
|
+
cfg, get_endpoint_url(cfg, task, task_definition["endpoint_type"])
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# mounts
|
|
138
|
+
deployment_mounts_list = []
|
|
139
|
+
if checkpoint_path := cfg.deployment.get("checkpoint_path"):
|
|
140
|
+
deployment_mounts_list.append(f"{checkpoint_path}:/checkpoint:ro")
|
|
141
|
+
if cache_path := cfg.deployment.get("cache_path"):
|
|
142
|
+
deployment_mounts_list.append(f"{cache_path}:/cache")
|
|
143
|
+
for source_mnt, target_mnt in (
|
|
144
|
+
cfg.execution.get("mounts", {}).get("deployment", {}).items()
|
|
145
|
+
):
|
|
146
|
+
deployment_mounts_list.append(f"{source_mnt}:{target_mnt}")
|
|
147
|
+
|
|
148
|
+
# env vars
|
|
149
|
+
deployment_env_vars = cfg.execution.get("env_vars", {}).get(
|
|
150
|
+
"deployment", {}
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if cfg.deployment.get("env_vars"):
|
|
154
|
+
warnings.warn(
|
|
155
|
+
"cfg.deployment.env_vars will be deprecated in future versions. "
|
|
156
|
+
"Use cfg.execution.env_vars.deployment instead.",
|
|
157
|
+
category=DeprecationWarning,
|
|
158
|
+
stacklevel=2,
|
|
159
|
+
)
|
|
160
|
+
deployment_env_vars.update(cfg.deployment["env_vars"])
|
|
161
|
+
|
|
162
|
+
command = cfg.deployment.command
|
|
163
|
+
deployment_extra_docker_args = cfg.execution.get(
|
|
164
|
+
"extra_docker_args", ""
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
deployment = {
|
|
168
|
+
"container_name": server_container_name,
|
|
169
|
+
"image": cfg.deployment.image,
|
|
170
|
+
"command": command,
|
|
171
|
+
"mounts": deployment_mounts_list,
|
|
172
|
+
"env_vars": [f"{k}={v}" for k, v in deployment_env_vars.items()],
|
|
173
|
+
"health_url": health_url,
|
|
174
|
+
"port": cfg.deployment.port,
|
|
175
|
+
"extra_docker_args": deployment_extra_docker_args,
|
|
176
|
+
}
|
|
177
|
+
|
|
119
178
|
# Create job ID as <invocation_id>.<n>
|
|
120
179
|
job_id = generate_job_id(invocation_id, idx)
|
|
121
180
|
job_ids.append(job_id)
|
|
122
|
-
|
|
181
|
+
client_container_name = f"client-{task.name}-{timestamp}"
|
|
123
182
|
|
|
124
183
|
# collect all env vars
|
|
125
184
|
env_vars = copy.deepcopy(dict(cfg.evaluation.get("env_vars", {})))
|
|
126
185
|
env_vars.update(task.get("env_vars", {}))
|
|
127
|
-
if cfg
|
|
186
|
+
if api_key_name := get_api_key_name(cfg):
|
|
128
187
|
assert "API_KEY" not in env_vars
|
|
129
|
-
env_vars["API_KEY"] =
|
|
188
|
+
env_vars["API_KEY"] = api_key_name
|
|
130
189
|
|
|
131
190
|
# check if the environment variables are set
|
|
132
191
|
for env_var in env_vars.values():
|
|
@@ -135,8 +194,11 @@ class LocalExecutor(BaseExecutor):
|
|
|
135
194
|
f"Trying to pass an unset environment variable {env_var}."
|
|
136
195
|
)
|
|
137
196
|
|
|
138
|
-
# check if required env vars are defined:
|
|
197
|
+
# check if required env vars are defined (excluding NEMO_EVALUATOR_DATASET_DIR which is handled separately):
|
|
139
198
|
for required_env_var in task_definition.get("required_env_vars", []):
|
|
199
|
+
# Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic below
|
|
200
|
+
if required_env_var == "NEMO_EVALUATOR_DATASET_DIR":
|
|
201
|
+
continue
|
|
140
202
|
if required_env_var not in env_vars.keys():
|
|
141
203
|
raise ValueError(
|
|
142
204
|
f"{task.name} task requires environment variable {required_env_var}."
|
|
@@ -144,12 +206,38 @@ class LocalExecutor(BaseExecutor):
|
|
|
144
206
|
f" pair {required_env_var}: YOUR_ENV_VAR_NAME"
|
|
145
207
|
)
|
|
146
208
|
|
|
209
|
+
# Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
|
|
210
|
+
dataset_mount_host = None
|
|
211
|
+
dataset_mount_container = None
|
|
212
|
+
dataset_env_var_value = None
|
|
213
|
+
if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
|
|
214
|
+
"required_env_vars", []
|
|
215
|
+
):
|
|
216
|
+
# Get dataset directory from task config
|
|
217
|
+
if "dataset_dir" in task:
|
|
218
|
+
dataset_mount_host = task["dataset_dir"]
|
|
219
|
+
else:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"{task.name} task requires a dataset_dir to be specified. "
|
|
222
|
+
f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
|
|
223
|
+
)
|
|
224
|
+
# Get container mount path (default to /datasets if not specified)
|
|
225
|
+
dataset_mount_container = task.get("dataset_mount_path", "/datasets")
|
|
226
|
+
# Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
|
|
227
|
+
dataset_env_var_value = dataset_mount_container
|
|
228
|
+
|
|
147
229
|
# format env_vars for a template
|
|
148
|
-
|
|
230
|
+
env_vars_list = [
|
|
149
231
|
f"{env_var_dst}=${env_var_src}"
|
|
150
232
|
for env_var_dst, env_var_src in env_vars.items()
|
|
151
233
|
]
|
|
152
234
|
|
|
235
|
+
# Add dataset env var if needed (directly with value, not from host env)
|
|
236
|
+
if dataset_env_var_value:
|
|
237
|
+
env_vars_list.append(
|
|
238
|
+
f"NEMO_EVALUATOR_DATASET_DIR={dataset_env_var_value}"
|
|
239
|
+
)
|
|
240
|
+
|
|
153
241
|
eval_image = task_definition["container"]
|
|
154
242
|
if "container" in task:
|
|
155
243
|
eval_image = task["container"]
|
|
@@ -166,15 +254,22 @@ class LocalExecutor(BaseExecutor):
|
|
|
166
254
|
# TODO(agronskiy): cleaner way is to encode everything with base64, not
|
|
167
255
|
# some parts (like ef_config.yaml) and just output as logs somewhere.
|
|
168
256
|
eval_factory_command_debug_comment = eval_factory_command_struct.debug
|
|
257
|
+
is_potentially_unsafe = (
|
|
258
|
+
is_potentially_unsafe
|
|
259
|
+
or eval_factory_command_struct.is_potentially_unsafe
|
|
260
|
+
)
|
|
169
261
|
evaluation_task = {
|
|
262
|
+
"deployment": deployment,
|
|
170
263
|
"name": task.name,
|
|
171
264
|
"job_id": job_id,
|
|
172
265
|
"eval_image": eval_image,
|
|
173
|
-
"
|
|
174
|
-
"env_vars":
|
|
266
|
+
"client_container_name": client_container_name,
|
|
267
|
+
"env_vars": env_vars_list,
|
|
175
268
|
"output_dir": task_output_dir,
|
|
176
269
|
"eval_factory_command": eval_factory_command,
|
|
177
270
|
"eval_factory_command_debug_comment": eval_factory_command_debug_comment,
|
|
271
|
+
"dataset_mount_host": dataset_mount_host,
|
|
272
|
+
"dataset_mount_container": dataset_mount_container,
|
|
178
273
|
}
|
|
179
274
|
evaluation_tasks.append(evaluation_task)
|
|
180
275
|
|
|
@@ -185,7 +280,7 @@ class LocalExecutor(BaseExecutor):
|
|
|
185
280
|
extra_docker_args = cfg.execution.get("extra_docker_args", "")
|
|
186
281
|
|
|
187
282
|
run_sh_content = (
|
|
188
|
-
|
|
283
|
+
run_template.render(
|
|
189
284
|
evaluation_tasks=[evaluation_task],
|
|
190
285
|
auto_export_destinations=auto_export_destinations,
|
|
191
286
|
extra_docker_args=extra_docker_args,
|
|
@@ -196,7 +291,7 @@ class LocalExecutor(BaseExecutor):
|
|
|
196
291
|
(task_output_dir / "run.sh").write_text(run_sh_content)
|
|
197
292
|
|
|
198
293
|
run_all_sequentially_sh_content = (
|
|
199
|
-
|
|
294
|
+
run_template.render(
|
|
200
295
|
evaluation_tasks=evaluation_tasks,
|
|
201
296
|
auto_export_destinations=auto_export_destinations,
|
|
202
297
|
extra_docker_args=extra_docker_args,
|
|
@@ -230,8 +325,34 @@ class LocalExecutor(BaseExecutor):
|
|
|
230
325
|
with open(task_output_dir / "run.sh", "r") as f:
|
|
231
326
|
print(grey(f.read()))
|
|
232
327
|
print(bold("\nTo execute, run without --dry-run"))
|
|
328
|
+
|
|
329
|
+
if is_potentially_unsafe:
|
|
330
|
+
print(
|
|
331
|
+
red(
|
|
332
|
+
"\nFound `pre_cmd` which carries security risk. When running without --dry-run "
|
|
333
|
+
"make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
|
|
334
|
+
)
|
|
335
|
+
)
|
|
233
336
|
return invocation_id
|
|
234
337
|
|
|
338
|
+
if is_potentially_unsafe:
|
|
339
|
+
if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
|
|
340
|
+
logger.warning(
|
|
341
|
+
"Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
342
|
+
"is set, proceeding with caution."
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
else:
|
|
346
|
+
logger.error(
|
|
347
|
+
"Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
348
|
+
"is not set. This might carry security risk and unstable environments. "
|
|
349
|
+
"To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
|
|
350
|
+
)
|
|
351
|
+
raise AttributeError(
|
|
352
|
+
"Untrusted command found in config, make sure you trust and "
|
|
353
|
+
"set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
|
|
354
|
+
)
|
|
355
|
+
|
|
235
356
|
# Save launched jobs metadata
|
|
236
357
|
db = ExecutionDB()
|
|
237
358
|
for job_id, task, evaluation_task in zip(
|
|
@@ -245,7 +366,7 @@ class LocalExecutor(BaseExecutor):
|
|
|
245
366
|
executor="local",
|
|
246
367
|
data={
|
|
247
368
|
"output_dir": str(evaluation_task["output_dir"]),
|
|
248
|
-
"container": evaluation_task["
|
|
369
|
+
"container": evaluation_task["client_container_name"],
|
|
249
370
|
"eval_image": evaluation_task["eval_image"],
|
|
250
371
|
},
|
|
251
372
|
config=OmegaConf.to_object(cfg),
|
|
@@ -301,11 +422,11 @@ class LocalExecutor(BaseExecutor):
|
|
|
301
422
|
|
|
302
423
|
print(bold(cyan("\nCommands for real-time monitoring:")))
|
|
303
424
|
for job_id, evaluation_task in zip(job_ids, evaluation_tasks):
|
|
304
|
-
|
|
305
|
-
print(f"
|
|
425
|
+
print(f"\n Job {job_id} ({evaluation_task['name']}):")
|
|
426
|
+
print(f" nemo-evaluator-launcher logs {job_id}")
|
|
306
427
|
|
|
307
428
|
print(bold(cyan("\nFollow all logs for this invocation:")))
|
|
308
|
-
print(f"
|
|
429
|
+
print(f" nemo-evaluator-launcher logs {invocation_id}")
|
|
309
430
|
|
|
310
431
|
return invocation_id
|
|
311
432
|
|
|
@@ -501,6 +622,240 @@ class LocalExecutor(BaseExecutor):
|
|
|
501
622
|
)
|
|
502
623
|
raise RuntimeError(error_msg)
|
|
503
624
|
|
|
625
|
+
@staticmethod
|
|
626
|
+
def stream_logs(
|
|
627
|
+
id: Union[str, List[str]], executor_name: Optional[str] = None
|
|
628
|
+
) -> Iterator[Tuple[str, str, str]]:
|
|
629
|
+
"""Stream logs from a job or invocation group.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
id: Unique job identifier, invocation identifier, or list of job IDs to stream simultaneously.
|
|
633
|
+
|
|
634
|
+
Yields:
|
|
635
|
+
Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
|
|
636
|
+
Empty lines are yielded as empty strings.
|
|
637
|
+
"""
|
|
638
|
+
db = ExecutionDB()
|
|
639
|
+
|
|
640
|
+
# Handle list of job IDs for simultaneous streaming
|
|
641
|
+
if isinstance(id, list):
|
|
642
|
+
# Collect all jobs from the list of job IDs
|
|
643
|
+
jobs = {}
|
|
644
|
+
for job_id in id:
|
|
645
|
+
job_data = db.get_job(job_id)
|
|
646
|
+
if job_data is None or job_data.executor != "local":
|
|
647
|
+
continue
|
|
648
|
+
jobs[job_id] = job_data
|
|
649
|
+
if not jobs:
|
|
650
|
+
return
|
|
651
|
+
# If id looks like an invocation_id (no dot), get all jobs for it
|
|
652
|
+
elif "." not in id:
|
|
653
|
+
jobs = db.get_jobs(id)
|
|
654
|
+
if not jobs:
|
|
655
|
+
return
|
|
656
|
+
else:
|
|
657
|
+
# Otherwise, treat as job_id
|
|
658
|
+
job_data = db.get_job(id)
|
|
659
|
+
if job_data is None or job_data.executor != "local":
|
|
660
|
+
return
|
|
661
|
+
jobs = {id: job_data}
|
|
662
|
+
|
|
663
|
+
# Collect log file paths and metadata
|
|
664
|
+
log_files = []
|
|
665
|
+
|
|
666
|
+
for job_id, job_data in jobs.items():
|
|
667
|
+
output_dir = pathlib.Path(job_data.data.get("output_dir", ""))
|
|
668
|
+
if not output_dir:
|
|
669
|
+
continue
|
|
670
|
+
|
|
671
|
+
# Get task name from config
|
|
672
|
+
task_name = LocalExecutor._extract_task_name(job_data, job_id)
|
|
673
|
+
|
|
674
|
+
log_file_path = output_dir / "logs" / "client_stdout.log"
|
|
675
|
+
|
|
676
|
+
log_files.append(
|
|
677
|
+
{
|
|
678
|
+
"job_id": job_id,
|
|
679
|
+
"task_name": task_name,
|
|
680
|
+
"path": log_file_path,
|
|
681
|
+
"file_handle": None,
|
|
682
|
+
"position": 0,
|
|
683
|
+
}
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
if not log_files:
|
|
687
|
+
return
|
|
688
|
+
|
|
689
|
+
# Track which files we've seen before (for tail behavior)
|
|
690
|
+
file_seen_before = {}
|
|
691
|
+
|
|
692
|
+
# Open files that exist, keep track of which ones we're waiting for
|
|
693
|
+
# First, yield the last 15 lines from existing files
|
|
694
|
+
for log_info in log_files:
|
|
695
|
+
if log_info["path"].exists():
|
|
696
|
+
file_seen_before[log_info["path"]] = True
|
|
697
|
+
# Read and yield last 15 lines
|
|
698
|
+
last_lines = LocalExecutor._read_last_n_lines(log_info["path"], 15)
|
|
699
|
+
for line in last_lines:
|
|
700
|
+
yield (
|
|
701
|
+
log_info["job_id"],
|
|
702
|
+
log_info["task_name"],
|
|
703
|
+
line,
|
|
704
|
+
)
|
|
705
|
+
try:
|
|
706
|
+
log_info["file_handle"] = open(
|
|
707
|
+
log_info["path"], "r", encoding="utf-8", errors="replace"
|
|
708
|
+
)
|
|
709
|
+
# Seek to end if file already exists (tail behavior)
|
|
710
|
+
log_info["file_handle"].seek(0, 2)
|
|
711
|
+
log_info["position"] = log_info["file_handle"].tell()
|
|
712
|
+
except Exception as e:
|
|
713
|
+
logger.error(f"Could not open {log_info['path']}: {e}")
|
|
714
|
+
else:
|
|
715
|
+
file_seen_before[log_info["path"]] = False
|
|
716
|
+
|
|
717
|
+
try:
|
|
718
|
+
while True:
|
|
719
|
+
any_activity = False
|
|
720
|
+
|
|
721
|
+
for log_info in log_files:
|
|
722
|
+
# Try to open file if it doesn't exist yet
|
|
723
|
+
if log_info["file_handle"] is None:
|
|
724
|
+
if log_info["path"].exists():
|
|
725
|
+
try:
|
|
726
|
+
# If file was just created, read last 15 lines first
|
|
727
|
+
if not file_seen_before.get(log_info["path"], False):
|
|
728
|
+
last_lines = LocalExecutor._read_last_n_lines(
|
|
729
|
+
log_info["path"], 15
|
|
730
|
+
)
|
|
731
|
+
for line in last_lines:
|
|
732
|
+
yield (
|
|
733
|
+
log_info["job_id"],
|
|
734
|
+
log_info["task_name"],
|
|
735
|
+
line,
|
|
736
|
+
)
|
|
737
|
+
file_seen_before[log_info["path"]] = True
|
|
738
|
+
|
|
739
|
+
log_info["file_handle"] = open(
|
|
740
|
+
log_info["path"],
|
|
741
|
+
"r",
|
|
742
|
+
encoding="utf-8",
|
|
743
|
+
errors="replace",
|
|
744
|
+
)
|
|
745
|
+
# Seek to end for tail behavior
|
|
746
|
+
log_info["file_handle"].seek(0, 2)
|
|
747
|
+
log_info["position"] = log_info["file_handle"].tell()
|
|
748
|
+
except Exception as e:
|
|
749
|
+
logger.error(f"Could not open {log_info['path']}: {e}")
|
|
750
|
+
continue
|
|
751
|
+
|
|
752
|
+
# Read new lines from file
|
|
753
|
+
if log_info["file_handle"] is not None:
|
|
754
|
+
try:
|
|
755
|
+
# Check if file has grown
|
|
756
|
+
current_size = log_info["path"].stat().st_size
|
|
757
|
+
if current_size > log_info["position"]:
|
|
758
|
+
log_info["file_handle"].seek(log_info["position"])
|
|
759
|
+
new_lines = log_info["file_handle"].readlines()
|
|
760
|
+
log_info["position"] = log_info["file_handle"].tell()
|
|
761
|
+
|
|
762
|
+
# Yield new lines
|
|
763
|
+
for line in new_lines:
|
|
764
|
+
line_stripped = line.rstrip("\n\r")
|
|
765
|
+
yield (
|
|
766
|
+
log_info["job_id"],
|
|
767
|
+
log_info["task_name"],
|
|
768
|
+
line_stripped,
|
|
769
|
+
)
|
|
770
|
+
any_activity = True
|
|
771
|
+
except (OSError, IOError) as e:
|
|
772
|
+
# File might have been deleted or moved
|
|
773
|
+
# Don't log error for every check, only on first error
|
|
774
|
+
if log_info.get("error_printed", False) is False:
|
|
775
|
+
logger.error(f"Error reading {log_info['path']}: {e}")
|
|
776
|
+
log_info["error_printed"] = True
|
|
777
|
+
log_info["file_handle"] = None
|
|
778
|
+
except Exception:
|
|
779
|
+
# Reset error flag if we successfully read again
|
|
780
|
+
log_info["error_printed"] = False
|
|
781
|
+
|
|
782
|
+
# If no activity, sleep briefly to avoid busy waiting
|
|
783
|
+
if not any_activity:
|
|
784
|
+
time.sleep(0.1)
|
|
785
|
+
|
|
786
|
+
except KeyboardInterrupt:
|
|
787
|
+
# Clean exit on Ctrl+C
|
|
788
|
+
pass
|
|
789
|
+
finally:
|
|
790
|
+
# Close all file handles
|
|
791
|
+
for log_info in log_files:
|
|
792
|
+
if log_info["file_handle"] is not None:
|
|
793
|
+
try:
|
|
794
|
+
log_info["file_handle"].close()
|
|
795
|
+
except Exception:
|
|
796
|
+
pass
|
|
797
|
+
|
|
798
|
+
@staticmethod
|
|
799
|
+
def _read_last_n_lines(file_path: pathlib.Path, n: int) -> List[str]:
|
|
800
|
+
"""Read the last N lines from a file efficiently.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
file_path: Path to the file to read from.
|
|
804
|
+
n: Number of lines to read from the end.
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
List of the last N lines (or fewer if file has fewer lines).
|
|
808
|
+
"""
|
|
809
|
+
try:
|
|
810
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
811
|
+
# Read all lines
|
|
812
|
+
all_lines = f.readlines()
|
|
813
|
+
# Return last n lines, stripping newlines
|
|
814
|
+
return [line.rstrip("\n\r") for line in all_lines[-n:]]
|
|
815
|
+
except Exception as e:
|
|
816
|
+
logger.warning(f"Could not read last {n} lines from {file_path}: {e}")
|
|
817
|
+
return []
|
|
818
|
+
|
|
819
|
+
@staticmethod
|
|
820
|
+
def _extract_task_name(job_data: JobData, job_id: str) -> str:
|
|
821
|
+
"""Extract task name from job data config.
|
|
822
|
+
|
|
823
|
+
Args:
|
|
824
|
+
job_data: JobData object containing config.
|
|
825
|
+
job_id: Job ID for error reporting.
|
|
826
|
+
|
|
827
|
+
Returns:
|
|
828
|
+
Task name string.
|
|
829
|
+
"""
|
|
830
|
+
config = job_data.config or {}
|
|
831
|
+
evaluation = config.get("evaluation", {})
|
|
832
|
+
tasks = evaluation.get("tasks", [])
|
|
833
|
+
|
|
834
|
+
# Find the task that matches this job
|
|
835
|
+
# For job_id like "15b9f667.0", index is 0
|
|
836
|
+
try:
|
|
837
|
+
if "." in job_id:
|
|
838
|
+
index = int(job_id.split(".")[1])
|
|
839
|
+
if len(tasks) > 0 and index >= len(tasks):
|
|
840
|
+
raise AttributeError(
|
|
841
|
+
f"Job task index {job_id} is larger than number of tasks {len(tasks)} in invocation"
|
|
842
|
+
)
|
|
843
|
+
# If index is valid and tasks exist, return the task name
|
|
844
|
+
if len(tasks) > 0 and index < len(tasks):
|
|
845
|
+
return tasks[index].get("name", "unknown")
|
|
846
|
+
except (ValueError, IndexError):
|
|
847
|
+
pass
|
|
848
|
+
|
|
849
|
+
# Fallback: try to get task name from output_dir
|
|
850
|
+
# output_dir typically ends with task name
|
|
851
|
+
output_dir = job_data.data.get("output_dir", "")
|
|
852
|
+
if output_dir:
|
|
853
|
+
parts = pathlib.Path(output_dir).parts
|
|
854
|
+
if parts:
|
|
855
|
+
return parts[-1]
|
|
856
|
+
|
|
857
|
+
return "unknown"
|
|
858
|
+
|
|
504
859
|
@staticmethod
|
|
505
860
|
def _add_to_killed_jobs(invocation_id: str, job_id: str) -> None:
|
|
506
861
|
"""Add a job ID to the killed jobs file for this invocation.
|