nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +159 -5
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/ls_task.py +280 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
- nemo_evaluator_launcher/cli/main.py +29 -2
- nemo_evaluator_launcher/cli/run.py +114 -16
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
- nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
- nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
- nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
- nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
- nemo_evaluator_launcher/common/helpers.py +200 -51
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/mapping.py +341 -155
- nemo_evaluator_launcher/common/printing_utils.py +25 -12
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
- nemo_evaluator_launcher/executors/base.py +31 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
- nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
- nemo_evaluator_launcher/executors/local/executor.py +383 -24
- nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
- nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/utils.py +32 -46
- nemo_evaluator_launcher/package_info.py +1 -1
- nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
- nemo_evaluator_launcher/resources/mapping.toml +64 -315
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
- nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
- nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
|
|
|
3
3
|
checkpoint_path: ???
|
|
4
4
|
served_model_name: ???
|
|
5
5
|
port: 8000
|
|
6
|
-
tensor_parallel_size:
|
|
6
|
+
tensor_parallel_size: 8
|
|
7
7
|
pipeline_parallel_size: 1
|
|
8
8
|
extra_args: ""
|
|
9
9
|
|
|
@@ -12,8 +12,7 @@ endpoints:
|
|
|
12
12
|
completions: /v1/completions
|
|
13
13
|
health: /health
|
|
14
14
|
|
|
15
|
-
command:
|
|
16
|
-
mpirun --allow-run-as-root --oversubscribe
|
|
15
|
+
command: mpirun --allow-run-as-root --oversubscribe
|
|
17
16
|
trtllm-serve serve /checkpoint
|
|
18
17
|
--tp_size=${deployment.tensor_parallel_size}
|
|
19
18
|
--pp_size=${deployment.pipeline_parallel_size}
|
|
@@ -37,6 +37,5 @@ command: vllm serve ${oc.select:deployment.hf_model_handle,/checkpoint}
|
|
|
37
37
|
--port ${deployment.port}
|
|
38
38
|
--trust-remote-code
|
|
39
39
|
--served-model-name ${deployment.served_model_name}
|
|
40
|
-
--enforce-eager
|
|
41
40
|
--gpu-memory-utilization ${deployment.gpu_memory_utilization}
|
|
42
41
|
${deployment.extra_args}
|
|
@@ -25,6 +25,12 @@ ntasks_per_node: 1
|
|
|
25
25
|
gres: gpu:8
|
|
26
26
|
walltime: 01:00:00
|
|
27
27
|
subproject: nemo-evaluator-launcher
|
|
28
|
+
sbatch_comment: null # Optional comment for SLURM job (translates to #SBATCH --comment='...')
|
|
29
|
+
|
|
30
|
+
# Deployment-specific SLURM configuration
|
|
31
|
+
deployment:
|
|
32
|
+
n_tasks: 1 # Number of tasks for deployment srun (default: 1, for multi-instance set to num_nodes)
|
|
33
|
+
|
|
28
34
|
env_vars:
|
|
29
35
|
deployment: {}
|
|
30
36
|
evaluation: {}
|
|
@@ -32,3 +38,11 @@ mounts:
|
|
|
32
38
|
deployment: {}
|
|
33
39
|
evaluation: {}
|
|
34
40
|
mount_home: true
|
|
41
|
+
|
|
42
|
+
proxy:
|
|
43
|
+
type: haproxy
|
|
44
|
+
image: haproxy:latest
|
|
45
|
+
config:
|
|
46
|
+
haproxy_port: 5009
|
|
47
|
+
health_check_path: /health
|
|
48
|
+
health_check_status: 200
|
|
@@ -21,10 +21,12 @@ Defines the abstract interface for all executor implementations and common statu
|
|
|
21
21
|
from abc import ABC, abstractmethod
|
|
22
22
|
from dataclasses import dataclass
|
|
23
23
|
from enum import Enum
|
|
24
|
-
from typing import Any, Optional
|
|
24
|
+
from typing import Any, Iterator, Optional, Tuple
|
|
25
25
|
|
|
26
26
|
from omegaconf import DictConfig
|
|
27
27
|
|
|
28
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
29
|
+
|
|
28
30
|
|
|
29
31
|
class ExecutionState(Enum):
|
|
30
32
|
"""Enumeration of possible execution states."""
|
|
@@ -118,3 +120,31 @@ class BaseExecutor(ABC):
|
|
|
118
120
|
return f"Could not find or kill job {job_id} ({container_or_id}) - job was already killed"
|
|
119
121
|
# Generic error message
|
|
120
122
|
return f"Could not find or kill job {job_id} ({container_or_id})"
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def stream_logs(
|
|
126
|
+
id: str, executor_name: Optional[str] = None
|
|
127
|
+
) -> Iterator[Tuple[str, str, str]]:
|
|
128
|
+
"""Stream logs from a job or invocation group.
|
|
129
|
+
|
|
130
|
+
This is an optional method that executors can implement to provide log streaming.
|
|
131
|
+
If not implemented, it will log a warning and raise NotImplementedError.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
id: Unique job identifier or invocation identifier.
|
|
135
|
+
executor_name: Optional executor name for warning messages. If not provided,
|
|
136
|
+
will attempt to infer from the calling context.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
|
|
140
|
+
Empty lines are yielded as empty strings.
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
NotImplementedError: If the executor does not support log streaming.
|
|
144
|
+
"""
|
|
145
|
+
executor_display_name = executor_name or "this executor"
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"Log streaming is not yet implemented for executor '{executor_display_name}'. "
|
|
148
|
+
"Only 'local' executor currently supports log streaming."
|
|
149
|
+
)
|
|
150
|
+
raise NotImplementedError("This executor does not support log streaming")
|
|
@@ -19,6 +19,7 @@ Handles Lepton endpoint creation, management, and health checks.
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
import json
|
|
22
|
+
import shlex
|
|
22
23
|
import subprocess
|
|
23
24
|
import time
|
|
24
25
|
from pathlib import Path
|
|
@@ -27,6 +28,7 @@ from typing import Any, Dict, Optional
|
|
|
27
28
|
# Import lepton dependencies
|
|
28
29
|
from omegaconf import DictConfig
|
|
29
30
|
|
|
31
|
+
from nemo_evaluator_launcher.common.helpers import _str_to_echo_command
|
|
30
32
|
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
31
33
|
|
|
32
34
|
|
|
@@ -235,6 +237,8 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
235
237
|
Returns:
|
|
236
238
|
Container specification for Lepton.
|
|
237
239
|
"""
|
|
240
|
+
# Extract pre_cmd from deployment_cfg
|
|
241
|
+
pre_cmd: str = deployment_cfg.get("pre_cmd") or ""
|
|
238
242
|
container_spec = {
|
|
239
243
|
"image": deployment_cfg.image,
|
|
240
244
|
"ports": [{"container_port": deployment_cfg.port}],
|
|
@@ -258,6 +262,18 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
258
262
|
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
259
263
|
command_parts.extend(deployment_cfg.extra_args.split())
|
|
260
264
|
|
|
265
|
+
# Wrap with pre_cmd if provided
|
|
266
|
+
if pre_cmd:
|
|
267
|
+
create_pre_script_cmd = _str_to_echo_command(
|
|
268
|
+
pre_cmd, filename="deployment_pre_cmd.sh"
|
|
269
|
+
)
|
|
270
|
+
original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
|
|
271
|
+
command_parts = [
|
|
272
|
+
"/bin/bash",
|
|
273
|
+
"-c",
|
|
274
|
+
f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
|
|
275
|
+
]
|
|
276
|
+
|
|
261
277
|
container_spec["command"] = command_parts
|
|
262
278
|
|
|
263
279
|
elif deployment_cfg.type == "sglang":
|
|
@@ -278,12 +294,31 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
|
|
|
278
294
|
if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
|
|
279
295
|
command_parts.extend(deployment_cfg.extra_args.split())
|
|
280
296
|
|
|
297
|
+
# Wrap with pre_cmd if provided
|
|
298
|
+
if pre_cmd:
|
|
299
|
+
create_pre_script_cmd = _str_to_echo_command(
|
|
300
|
+
pre_cmd, filename="deployment_pre_cmd.sh"
|
|
301
|
+
)
|
|
302
|
+
original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
|
|
303
|
+
command_parts = [
|
|
304
|
+
"/bin/bash",
|
|
305
|
+
"-c",
|
|
306
|
+
f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
|
|
307
|
+
]
|
|
308
|
+
|
|
281
309
|
container_spec["command"] = command_parts
|
|
282
310
|
|
|
283
311
|
elif deployment_cfg.type == "nim":
|
|
284
312
|
# NIM containers use their default entrypoint - no custom command needed
|
|
285
313
|
# Configuration is handled via environment variables
|
|
286
|
-
|
|
314
|
+
# pre_cmd is not supported for NIM deployments
|
|
315
|
+
if pre_cmd:
|
|
316
|
+
logger.error(
|
|
317
|
+
"pre_cmd is not supported for NIM deployments",
|
|
318
|
+
deployment_type="nim",
|
|
319
|
+
pre_cmd=pre_cmd,
|
|
320
|
+
)
|
|
321
|
+
raise ValueError("pre_cmd is not supported for NIM deployments")
|
|
287
322
|
|
|
288
323
|
return container_spec
|
|
289
324
|
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
Handles deployment and evaluation using Lepton endpoints with NIM containers.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import os
|
|
21
22
|
import time
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from typing import List
|
|
@@ -33,9 +34,10 @@ from nemo_evaluator_launcher.common.execdb import (
|
|
|
33
34
|
from nemo_evaluator_launcher.common.helpers import get_eval_factory_command
|
|
34
35
|
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
35
36
|
from nemo_evaluator_launcher.common.mapping import (
|
|
36
|
-
|
|
37
|
+
get_task_definition_for_job,
|
|
37
38
|
load_tasks_mapping,
|
|
38
39
|
)
|
|
40
|
+
from nemo_evaluator_launcher.common.printing_utils import red
|
|
39
41
|
from nemo_evaluator_launcher.executors.base import (
|
|
40
42
|
BaseExecutor,
|
|
41
43
|
ExecutionState,
|
|
@@ -88,6 +90,23 @@ class LeptonExecutor(BaseExecutor):
|
|
|
88
90
|
# Generate invocation ID
|
|
89
91
|
invocation_id = generate_invocation_id()
|
|
90
92
|
|
|
93
|
+
# TODO(agronskiy): the structure of this executor differs from others,
|
|
94
|
+
# so the best place to check for unsafe commands yelids a bit of duplication.
|
|
95
|
+
# We can't use the get_eval_factory_command here because the port is not yet
|
|
96
|
+
# populated.
|
|
97
|
+
# Refactor the whole thing.
|
|
98
|
+
is_potentially_unsafe = False
|
|
99
|
+
for idx, task in enumerate(cfg.evaluation.tasks):
|
|
100
|
+
pre_cmd: str = task.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
|
|
101
|
+
if pre_cmd:
|
|
102
|
+
is_potentially_unsafe = True
|
|
103
|
+
break
|
|
104
|
+
|
|
105
|
+
# Check for deployment pre_cmd
|
|
106
|
+
deployment_pre_cmd: str = cfg.deployment.get("pre_cmd") or ""
|
|
107
|
+
if deployment_pre_cmd:
|
|
108
|
+
is_potentially_unsafe = True
|
|
109
|
+
|
|
91
110
|
# DRY-RUN mode
|
|
92
111
|
if dry_run:
|
|
93
112
|
output_dir = Path(cfg.execution.output_dir).absolute() / invocation_id
|
|
@@ -102,8 +121,34 @@ class LeptonExecutor(BaseExecutor):
|
|
|
102
121
|
else:
|
|
103
122
|
print(f"with endpoint type '{cfg.deployment.type}'")
|
|
104
123
|
|
|
124
|
+
if is_potentially_unsafe:
|
|
125
|
+
print(
|
|
126
|
+
red(
|
|
127
|
+
"\nFound `pre_cmd` (evaluation or deployment) which carries security risk. When running without --dry-run "
|
|
128
|
+
"make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
105
132
|
return invocation_id
|
|
106
133
|
|
|
134
|
+
if is_potentially_unsafe:
|
|
135
|
+
if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
|
|
136
|
+
logger.warning(
|
|
137
|
+
"Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
138
|
+
"is set, proceeding with caution."
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
else:
|
|
142
|
+
logger.error(
|
|
143
|
+
"Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
|
|
144
|
+
"is not set. This might carry security risk and unstable environments. "
|
|
145
|
+
"To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
|
|
146
|
+
)
|
|
147
|
+
raise AttributeError(
|
|
148
|
+
"Untrusted command found in config, make sure you trust and "
|
|
149
|
+
"set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
|
|
150
|
+
)
|
|
151
|
+
|
|
107
152
|
# For deployment: none, we use the existing endpoint for all tasks
|
|
108
153
|
if cfg.deployment.type == "none":
|
|
109
154
|
print("📌 Using existing endpoint (deployment: none)")
|
|
@@ -248,8 +293,10 @@ class LeptonExecutor(BaseExecutor):
|
|
|
248
293
|
return
|
|
249
294
|
|
|
250
295
|
# Construct the full endpoint URL
|
|
251
|
-
task_definition =
|
|
252
|
-
task.name,
|
|
296
|
+
task_definition = get_task_definition_for_job(
|
|
297
|
+
task_query=task.name,
|
|
298
|
+
base_mapping=tasks_mapping,
|
|
299
|
+
container=task.get("container"),
|
|
253
300
|
)
|
|
254
301
|
task_endpoint_type = task_definition["endpoint_type"]
|
|
255
302
|
endpoint_path = cfg.deployment.endpoints[task_endpoint_type]
|
|
@@ -338,7 +385,11 @@ class LeptonExecutor(BaseExecutor):
|
|
|
338
385
|
|
|
339
386
|
# Submit each evaluation task as a Lepton job
|
|
340
387
|
for idx, task in enumerate(cfg.evaluation.tasks):
|
|
341
|
-
task_definition =
|
|
388
|
+
task_definition = get_task_definition_for_job(
|
|
389
|
+
task_query=task.name,
|
|
390
|
+
base_mapping=tasks_mapping,
|
|
391
|
+
container=task.get("container"),
|
|
392
|
+
)
|
|
342
393
|
|
|
343
394
|
# Create job ID and Lepton job name (max 36 chars)
|
|
344
395
|
job_id = generate_job_id(invocation_id, idx)
|
|
@@ -491,6 +542,33 @@ class LeptonExecutor(BaseExecutor):
|
|
|
491
542
|
|
|
492
543
|
job_mounts.append(mount_dict)
|
|
493
544
|
|
|
545
|
+
# Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
|
|
546
|
+
if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
|
|
547
|
+
"required_env_vars", []
|
|
548
|
+
):
|
|
549
|
+
# Get dataset directory from task config
|
|
550
|
+
if "dataset_dir" in task:
|
|
551
|
+
dataset_mount_host = task["dataset_dir"]
|
|
552
|
+
else:
|
|
553
|
+
raise ValueError(
|
|
554
|
+
f"{task.name} task requires a dataset_dir to be specified. "
|
|
555
|
+
f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
|
|
556
|
+
)
|
|
557
|
+
# Get container mount path (default to /datasets if not specified)
|
|
558
|
+
dataset_mount_container = task.get(
|
|
559
|
+
"dataset_mount_path", "/datasets"
|
|
560
|
+
)
|
|
561
|
+
# Add dataset mount to job mounts
|
|
562
|
+
# Lepton mount format: {"path": "/path/in/container", "mount_from": {"path": "/host/path"}}
|
|
563
|
+
job_mounts.append(
|
|
564
|
+
{
|
|
565
|
+
"path": dataset_mount_container,
|
|
566
|
+
"mount_from": {"path": dataset_mount_host},
|
|
567
|
+
}
|
|
568
|
+
)
|
|
569
|
+
# Add NEMO_EVALUATOR_DATASET_DIR environment variable
|
|
570
|
+
job_env_vars["NEMO_EVALUATOR_DATASET_DIR"] = dataset_mount_container
|
|
571
|
+
|
|
494
572
|
print(
|
|
495
573
|
f" - Storage: {len(job_mounts)} mount(s) with evaluation ID isolation"
|
|
496
574
|
)
|
|
@@ -610,7 +688,7 @@ class LeptonExecutor(BaseExecutor):
|
|
|
610
688
|
job_state = lepton_status.get("state", "Unknown")
|
|
611
689
|
|
|
612
690
|
# Map Lepton job states to our execution states
|
|
613
|
-
if job_state
|
|
691
|
+
if job_state in ["Succeeded", "Completed"]:
|
|
614
692
|
state = ExecutionState.SUCCESS
|
|
615
693
|
elif job_state in ["Running", "Pending", "Starting"]:
|
|
616
694
|
state = ExecutionState.RUNNING
|
|
@@ -817,9 +895,13 @@ def _dry_run_lepton(
|
|
|
817
895
|
) -> None:
|
|
818
896
|
print("DRY RUN: Lepton job configurations prepared")
|
|
819
897
|
try:
|
|
820
|
-
# validate tasks
|
|
898
|
+
# validate tasks (container overrides are supported)
|
|
821
899
|
for task in cfg.evaluation.tasks:
|
|
822
|
-
|
|
900
|
+
_ = get_task_definition_for_job(
|
|
901
|
+
task_query=task.name,
|
|
902
|
+
base_mapping=tasks_mapping,
|
|
903
|
+
container=task.get("container"),
|
|
904
|
+
)
|
|
823
905
|
|
|
824
906
|
# nice-to-have checks (existing endpoint URL or endpoints mapping)
|
|
825
907
|
if getattr(cfg.deployment, "type", None) == "none":
|
|
@@ -837,7 +919,11 @@ def _dry_run_lepton(
|
|
|
837
919
|
else:
|
|
838
920
|
endpoints_cfg = getattr(cfg.deployment, "endpoints", {}) or {}
|
|
839
921
|
for task in cfg.evaluation.tasks:
|
|
840
|
-
td =
|
|
922
|
+
td = get_task_definition_for_job(
|
|
923
|
+
task_query=task.name,
|
|
924
|
+
base_mapping=tasks_mapping,
|
|
925
|
+
container=task.get("container"),
|
|
926
|
+
)
|
|
841
927
|
etype = td.get("endpoint_type")
|
|
842
928
|
if etype not in endpoints_cfg:
|
|
843
929
|
raise ValueError(
|
|
@@ -856,9 +942,21 @@ def _dry_run_lepton(
|
|
|
856
942
|
getattr(cfg, "target", {}).get("api_endpoint", {}), "api_key_name", None
|
|
857
943
|
)
|
|
858
944
|
for task in cfg.evaluation.tasks:
|
|
859
|
-
td =
|
|
945
|
+
td = get_task_definition_for_job(
|
|
946
|
+
task_query=task.name,
|
|
947
|
+
base_mapping=tasks_mapping,
|
|
948
|
+
container=task.get("container"),
|
|
949
|
+
)
|
|
860
950
|
required = td.get("required_env_vars", []) or []
|
|
861
951
|
for var in required:
|
|
952
|
+
# Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic
|
|
953
|
+
if var == "NEMO_EVALUATOR_DATASET_DIR":
|
|
954
|
+
if "dataset_dir" not in task:
|
|
955
|
+
raise ValueError(
|
|
956
|
+
f"Task '{task.name}' requires dataset_dir to be specified. "
|
|
957
|
+
f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
|
|
958
|
+
)
|
|
959
|
+
continue
|
|
862
960
|
if var == "API_KEY":
|
|
863
961
|
if not (("API_KEY" in lepton_env_vars) or bool(api_key_name)):
|
|
864
962
|
raise ValueError(
|