nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. nemo_evaluator_launcher/api/functional.py +159 -5
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/ls_task.py +280 -0
  4. nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
  5. nemo_evaluator_launcher/cli/main.py +29 -2
  6. nemo_evaluator_launcher/cli/run.py +114 -16
  7. nemo_evaluator_launcher/cli/version.py +26 -23
  8. nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  9. nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  10. nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  11. nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  12. nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  13. nemo_evaluator_launcher/common/helpers.py +200 -51
  14. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  15. nemo_evaluator_launcher/common/mapping.py +341 -155
  16. nemo_evaluator_launcher/common/printing_utils.py +25 -12
  17. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  18. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  19. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  20. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  21. nemo_evaluator_launcher/executors/base.py +31 -1
  22. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  23. nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
  24. nemo_evaluator_launcher/executors/local/executor.py +383 -24
  25. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  26. nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
  27. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  28. nemo_evaluator_launcher/exporters/utils.py +32 -46
  29. nemo_evaluator_launcher/package_info.py +1 -1
  30. nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  31. nemo_evaluator_launcher/resources/mapping.toml +64 -315
  32. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
  33. nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
  34. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
  35. nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
  36. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
  37. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
  38. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
3
3
  checkpoint_path: ???
4
4
  served_model_name: ???
5
5
  port: 8000
6
- tensor_parallel_size: 4
6
+ tensor_parallel_size: 8
7
7
  pipeline_parallel_size: 1
8
8
  extra_args: ""
9
9
 
@@ -12,8 +12,7 @@ endpoints:
12
12
  completions: /v1/completions
13
13
  health: /health
14
14
 
15
- command:
16
- mpirun --allow-run-as-root --oversubscribe
15
+ command: mpirun --allow-run-as-root --oversubscribe
17
16
  trtllm-serve serve /checkpoint
18
17
  --tp_size=${deployment.tensor_parallel_size}
19
18
  --pp_size=${deployment.pipeline_parallel_size}
@@ -37,6 +37,5 @@ command: vllm serve ${oc.select:deployment.hf_model_handle,/checkpoint}
37
37
  --port ${deployment.port}
38
38
  --trust-remote-code
39
39
  --served-model-name ${deployment.served_model_name}
40
- --enforce-eager
41
40
  --gpu-memory-utilization ${deployment.gpu_memory_utilization}
42
41
  ${deployment.extra_args}
@@ -25,6 +25,12 @@ ntasks_per_node: 1
25
25
  gres: gpu:8
26
26
  walltime: 01:00:00
27
27
  subproject: nemo-evaluator-launcher
28
+ sbatch_comment: null # Optional comment for SLURM job (translates to #SBATCH --comment='...')
29
+
30
+ # Deployment-specific SLURM configuration
31
+ deployment:
32
+ n_tasks: 1 # Number of tasks for deployment srun (default: 1, for multi-instance set to num_nodes)
33
+
28
34
  env_vars:
29
35
  deployment: {}
30
36
  evaluation: {}
@@ -32,3 +38,11 @@ mounts:
32
38
  deployment: {}
33
39
  evaluation: {}
34
40
  mount_home: true
41
+
42
+ proxy:
43
+ type: haproxy
44
+ image: haproxy:latest
45
+ config:
46
+ haproxy_port: 5009
47
+ health_check_path: /health
48
+ health_check_status: 200
@@ -21,10 +21,12 @@ Defines the abstract interface for all executor implementations and common statu
21
21
  from abc import ABC, abstractmethod
22
22
  from dataclasses import dataclass
23
23
  from enum import Enum
24
- from typing import Any, Optional
24
+ from typing import Any, Iterator, Optional, Tuple
25
25
 
26
26
  from omegaconf import DictConfig
27
27
 
28
+ from nemo_evaluator_launcher.common.logging_utils import logger
29
+
28
30
 
29
31
  class ExecutionState(Enum):
30
32
  """Enumeration of possible execution states."""
@@ -118,3 +120,31 @@ class BaseExecutor(ABC):
118
120
  return f"Could not find or kill job {job_id} ({container_or_id}) - job was already killed"
119
121
  # Generic error message
120
122
  return f"Could not find or kill job {job_id} ({container_or_id})"
123
+
124
+ @staticmethod
125
+ def stream_logs(
126
+ id: str, executor_name: Optional[str] = None
127
+ ) -> Iterator[Tuple[str, str, str]]:
128
+ """Stream logs from a job or invocation group.
129
+
130
+ This is an optional method that executors can implement to provide log streaming.
131
+ If not implemented, it will log a warning and raise NotImplementedError.
132
+
133
+ Args:
134
+ id: Unique job identifier or invocation identifier.
135
+ executor_name: Optional executor name for warning messages. If not provided,
136
+ will attempt to infer from the calling context.
137
+
138
+ Yields:
139
+ Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
140
+ Empty lines are yielded as empty strings.
141
+
142
+ Raises:
143
+ NotImplementedError: If the executor does not support log streaming.
144
+ """
145
+ executor_display_name = executor_name or "this executor"
146
+ logger.warning(
147
+ f"Log streaming is not yet implemented for executor '{executor_display_name}'. "
148
+ "Only 'local' executor currently supports log streaming."
149
+ )
150
+ raise NotImplementedError("This executor does not support log streaming")
@@ -19,6 +19,7 @@ Handles Lepton endpoint creation, management, and health checks.
19
19
  """
20
20
 
21
21
  import json
22
+ import shlex
22
23
  import subprocess
23
24
  import time
24
25
  from pathlib import Path
@@ -27,6 +28,7 @@ from typing import Any, Dict, Optional
27
28
  # Import lepton dependencies
28
29
  from omegaconf import DictConfig
29
30
 
31
+ from nemo_evaluator_launcher.common.helpers import _str_to_echo_command
30
32
  from nemo_evaluator_launcher.common.logging_utils import logger
31
33
 
32
34
 
@@ -235,6 +237,8 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
235
237
  Returns:
236
238
  Container specification for Lepton.
237
239
  """
240
+ # Extract pre_cmd from deployment_cfg
241
+ pre_cmd: str = deployment_cfg.get("pre_cmd") or ""
238
242
  container_spec = {
239
243
  "image": deployment_cfg.image,
240
244
  "ports": [{"container_port": deployment_cfg.port}],
@@ -258,6 +262,18 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
258
262
  if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
259
263
  command_parts.extend(deployment_cfg.extra_args.split())
260
264
 
265
+ # Wrap with pre_cmd if provided
266
+ if pre_cmd:
267
+ create_pre_script_cmd = _str_to_echo_command(
268
+ pre_cmd, filename="deployment_pre_cmd.sh"
269
+ )
270
+ original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
271
+ command_parts = [
272
+ "/bin/bash",
273
+ "-c",
274
+ f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
275
+ ]
276
+
261
277
  container_spec["command"] = command_parts
262
278
 
263
279
  elif deployment_cfg.type == "sglang":
@@ -278,12 +294,31 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
278
294
  if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
279
295
  command_parts.extend(deployment_cfg.extra_args.split())
280
296
 
297
+ # Wrap with pre_cmd if provided
298
+ if pre_cmd:
299
+ create_pre_script_cmd = _str_to_echo_command(
300
+ pre_cmd, filename="deployment_pre_cmd.sh"
301
+ )
302
+ original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
303
+ command_parts = [
304
+ "/bin/bash",
305
+ "-c",
306
+ f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
307
+ ]
308
+
281
309
  container_spec["command"] = command_parts
282
310
 
283
311
  elif deployment_cfg.type == "nim":
284
312
  # NIM containers use their default entrypoint - no custom command needed
285
313
  # Configuration is handled via environment variables
286
- pass
314
+ # pre_cmd is not supported for NIM deployments
315
+ if pre_cmd:
316
+ logger.error(
317
+ "pre_cmd is not supported for NIM deployments",
318
+ deployment_type="nim",
319
+ pre_cmd=pre_cmd,
320
+ )
321
+ raise ValueError("pre_cmd is not supported for NIM deployments")
287
322
 
288
323
  return container_spec
289
324
 
@@ -18,6 +18,7 @@
18
18
  Handles deployment and evaluation using Lepton endpoints with NIM containers.
19
19
  """
20
20
 
21
+ import os
21
22
  import time
22
23
  from pathlib import Path
23
24
  from typing import List
@@ -33,9 +34,10 @@ from nemo_evaluator_launcher.common.execdb import (
33
34
  from nemo_evaluator_launcher.common.helpers import get_eval_factory_command
34
35
  from nemo_evaluator_launcher.common.logging_utils import logger
35
36
  from nemo_evaluator_launcher.common.mapping import (
36
- get_task_from_mapping,
37
+ get_task_definition_for_job,
37
38
  load_tasks_mapping,
38
39
  )
40
+ from nemo_evaluator_launcher.common.printing_utils import red
39
41
  from nemo_evaluator_launcher.executors.base import (
40
42
  BaseExecutor,
41
43
  ExecutionState,
@@ -88,6 +90,23 @@ class LeptonExecutor(BaseExecutor):
88
90
  # Generate invocation ID
89
91
  invocation_id = generate_invocation_id()
90
92
 
93
+ # TODO(agronskiy): the structure of this executor differs from others,
94
+ # so the best place to check for unsafe commands yelids a bit of duplication.
95
+ # We can't use the get_eval_factory_command here because the port is not yet
96
+ # populated.
97
+ # Refactor the whole thing.
98
+ is_potentially_unsafe = False
99
+ for idx, task in enumerate(cfg.evaluation.tasks):
100
+ pre_cmd: str = task.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
101
+ if pre_cmd:
102
+ is_potentially_unsafe = True
103
+ break
104
+
105
+ # Check for deployment pre_cmd
106
+ deployment_pre_cmd: str = cfg.deployment.get("pre_cmd") or ""
107
+ if deployment_pre_cmd:
108
+ is_potentially_unsafe = True
109
+
91
110
  # DRY-RUN mode
92
111
  if dry_run:
93
112
  output_dir = Path(cfg.execution.output_dir).absolute() / invocation_id
@@ -102,8 +121,34 @@ class LeptonExecutor(BaseExecutor):
102
121
  else:
103
122
  print(f"with endpoint type '{cfg.deployment.type}'")
104
123
 
124
+ if is_potentially_unsafe:
125
+ print(
126
+ red(
127
+ "\nFound `pre_cmd` (evaluation or deployment) which carries security risk. When running without --dry-run "
128
+ "make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
129
+ )
130
+ )
131
+
105
132
  return invocation_id
106
133
 
134
+ if is_potentially_unsafe:
135
+ if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
136
+ logger.warning(
137
+ "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
138
+ "is set, proceeding with caution."
139
+ )
140
+
141
+ else:
142
+ logger.error(
143
+ "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
144
+ "is not set. This might carry security risk and unstable environments. "
145
+ "To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
146
+ )
147
+ raise AttributeError(
148
+ "Untrusted command found in config, make sure you trust and "
149
+ "set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
150
+ )
151
+
107
152
  # For deployment: none, we use the existing endpoint for all tasks
108
153
  if cfg.deployment.type == "none":
109
154
  print("📌 Using existing endpoint (deployment: none)")
@@ -248,8 +293,10 @@ class LeptonExecutor(BaseExecutor):
248
293
  return
249
294
 
250
295
  # Construct the full endpoint URL
251
- task_definition = get_task_from_mapping(
252
- task.name, tasks_mapping
296
+ task_definition = get_task_definition_for_job(
297
+ task_query=task.name,
298
+ base_mapping=tasks_mapping,
299
+ container=task.get("container"),
253
300
  )
254
301
  task_endpoint_type = task_definition["endpoint_type"]
255
302
  endpoint_path = cfg.deployment.endpoints[task_endpoint_type]
@@ -338,7 +385,11 @@ class LeptonExecutor(BaseExecutor):
338
385
 
339
386
  # Submit each evaluation task as a Lepton job
340
387
  for idx, task in enumerate(cfg.evaluation.tasks):
341
- task_definition = get_task_from_mapping(task.name, tasks_mapping)
388
+ task_definition = get_task_definition_for_job(
389
+ task_query=task.name,
390
+ base_mapping=tasks_mapping,
391
+ container=task.get("container"),
392
+ )
342
393
 
343
394
  # Create job ID and Lepton job name (max 36 chars)
344
395
  job_id = generate_job_id(invocation_id, idx)
@@ -491,6 +542,33 @@ class LeptonExecutor(BaseExecutor):
491
542
 
492
543
  job_mounts.append(mount_dict)
493
544
 
545
+ # Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
546
+ if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
547
+ "required_env_vars", []
548
+ ):
549
+ # Get dataset directory from task config
550
+ if "dataset_dir" in task:
551
+ dataset_mount_host = task["dataset_dir"]
552
+ else:
553
+ raise ValueError(
554
+ f"{task.name} task requires a dataset_dir to be specified. "
555
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
556
+ )
557
+ # Get container mount path (default to /datasets if not specified)
558
+ dataset_mount_container = task.get(
559
+ "dataset_mount_path", "/datasets"
560
+ )
561
+ # Add dataset mount to job mounts
562
+ # Lepton mount format: {"path": "/path/in/container", "mount_from": {"path": "/host/path"}}
563
+ job_mounts.append(
564
+ {
565
+ "path": dataset_mount_container,
566
+ "mount_from": {"path": dataset_mount_host},
567
+ }
568
+ )
569
+ # Add NEMO_EVALUATOR_DATASET_DIR environment variable
570
+ job_env_vars["NEMO_EVALUATOR_DATASET_DIR"] = dataset_mount_container
571
+
494
572
  print(
495
573
  f" - Storage: {len(job_mounts)} mount(s) with evaluation ID isolation"
496
574
  )
@@ -610,7 +688,7 @@ class LeptonExecutor(BaseExecutor):
610
688
  job_state = lepton_status.get("state", "Unknown")
611
689
 
612
690
  # Map Lepton job states to our execution states
613
- if job_state == "Succeeded":
691
+ if job_state in ["Succeeded", "Completed"]:
614
692
  state = ExecutionState.SUCCESS
615
693
  elif job_state in ["Running", "Pending", "Starting"]:
616
694
  state = ExecutionState.RUNNING
@@ -817,9 +895,13 @@ def _dry_run_lepton(
817
895
  ) -> None:
818
896
  print("DRY RUN: Lepton job configurations prepared")
819
897
  try:
820
- # validate tasks
898
+ # validate tasks (container overrides are supported)
821
899
  for task in cfg.evaluation.tasks:
822
- get_task_from_mapping(task.name, tasks_mapping)
900
+ _ = get_task_definition_for_job(
901
+ task_query=task.name,
902
+ base_mapping=tasks_mapping,
903
+ container=task.get("container"),
904
+ )
823
905
 
824
906
  # nice-to-have checks (existing endpoint URL or endpoints mapping)
825
907
  if getattr(cfg.deployment, "type", None) == "none":
@@ -837,7 +919,11 @@ def _dry_run_lepton(
837
919
  else:
838
920
  endpoints_cfg = getattr(cfg.deployment, "endpoints", {}) or {}
839
921
  for task in cfg.evaluation.tasks:
840
- td = get_task_from_mapping(task.name, tasks_mapping)
922
+ td = get_task_definition_for_job(
923
+ task_query=task.name,
924
+ base_mapping=tasks_mapping,
925
+ container=task.get("container"),
926
+ )
841
927
  etype = td.get("endpoint_type")
842
928
  if etype not in endpoints_cfg:
843
929
  raise ValueError(
@@ -856,9 +942,21 @@ def _dry_run_lepton(
856
942
  getattr(cfg, "target", {}).get("api_endpoint", {}), "api_key_name", None
857
943
  )
858
944
  for task in cfg.evaluation.tasks:
859
- td = get_task_from_mapping(task.name, tasks_mapping)
945
+ td = get_task_definition_for_job(
946
+ task_query=task.name,
947
+ base_mapping=tasks_mapping,
948
+ container=task.get("container"),
949
+ )
860
950
  required = td.get("required_env_vars", []) or []
861
951
  for var in required:
952
+ # Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic
953
+ if var == "NEMO_EVALUATOR_DATASET_DIR":
954
+ if "dataset_dir" not in task:
955
+ raise ValueError(
956
+ f"Task '{task.name}' requires dataset_dir to be specified. "
957
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
958
+ )
959
+ continue
862
960
  if var == "API_KEY":
863
961
  if not (("API_KEY" in lepton_env_vars) or bool(api_key_name)):
864
962
  raise ValueError(