nemo-evaluator-launcher 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (63) hide show
  1. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/PKG-INFO +1 -1
  2. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/api/functional.py +22 -18
  3. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/main.py +60 -0
  4. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/run.py +20 -6
  5. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/status.py +42 -3
  6. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/common/execdb.py +121 -27
  7. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/common/helpers.py +1 -1
  8. nemo_evaluator_launcher-0.1.10/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
  9. nemo_evaluator_launcher-0.1.10/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
  10. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/lepton/executor.py +1 -1
  11. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/local/executor.py +22 -22
  12. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/local/run.template.sh +1 -1
  13. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/slurm/executor.py +2 -2
  14. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/package_info.py +1 -1
  15. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +1 -1
  16. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +2 -0
  17. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/LICENSE +0 -0
  18. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/README.md +0 -0
  19. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/pyproject.toml +0 -0
  20. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/setup.cfg +0 -0
  21. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/__init__.py +0 -0
  22. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
  23. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/api/types.py +0 -0
  24. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/api/utils.py +0 -0
  25. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
  26. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/export.py +0 -0
  27. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
  28. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
  29. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
  30. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/cli/version.py +0 -0
  31. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
  32. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
  33. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
  34. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
  35. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
  36. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
  37. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
  38. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
  39. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
  40. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
  41. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
  42. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
  43. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
  44. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/base.py +0 -0
  45. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
  46. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +0 -0
  47. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
  48. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
  49. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
  50. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
  51. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
  52. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/base.py +0 -0
  53. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/gsheets.py +0 -0
  54. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/local.py +0 -0
  55. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/mlflow.py +0 -0
  56. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
  57. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/utils.py +0 -0
  58. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/exporters/wandb.py +0 -0
  59. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher/resources/mapping.toml +0 -0
  60. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
  61. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
  62. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/requires.txt +0 -0
  63. {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.10}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -99,11 +99,13 @@ def run_eval(cfg: RunConfig, dry_run: bool = False) -> Optional[str]:
99
99
  return get_executor(cfg.execution.type).execute_eval(cfg, dry_run)
100
100
 
101
101
 
102
- def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
102
+ def get_status(ids_or_prefixes: list[str]) -> list[dict[str, Any]]:
103
103
  """Get status of jobs by their IDs or invocation IDs.
104
104
 
105
105
  Args:
106
- job_ids: List of job IDs or invocation IDs to check status for.
106
+ job_ids: List of job IDs or invocation IDs to check status for. Short ones are allowed,
107
+ we would try to match the full ones from prefixes if no collisions are
108
+ present.
107
109
 
108
110
  Returns:
109
111
  list[dict[str, Any]]: List of status dictionaries for each job or invocation.
@@ -114,14 +116,14 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
114
116
  db = ExecutionDB()
115
117
  results: List[dict[str, Any]] = []
116
118
 
117
- for job_id in job_ids:
118
- # If id looks like an invocation_id (8 hex digits, no dot), get all jobs for it
119
- if len(job_id) == 8 and "." not in job_id:
120
- jobs = db.get_jobs(job_id)
119
+ for id_or_prefix in ids_or_prefixes:
120
+ # If id looks like an invocation_id (no dot), get all jobs for it
121
+ if "." not in id_or_prefix:
122
+ jobs = db.get_jobs(id_or_prefix)
121
123
  if not jobs:
122
124
  results.append(
123
125
  {
124
- "invocation": job_id,
126
+ "invocation": id_or_prefix,
125
127
  "job_id": None,
126
128
  "status": "not_found",
127
129
  "data": {},
@@ -136,7 +138,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
136
138
  except ValueError as e:
137
139
  results.append(
138
140
  {
139
- "invocation": job_id,
141
+ "invocation": id_or_prefix,
140
142
  "job_id": None,
141
143
  "status": "error",
142
144
  "data": {"error": str(e)},
@@ -146,7 +148,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
146
148
 
147
149
  # Get status from the executor for all jobs in the invocation
148
150
  try:
149
- status_list = executor_cls.get_status(job_id)
151
+ status_list = executor_cls.get_status(id_or_prefix)
150
152
 
151
153
  # Create a result for each job in the invocation
152
154
  for job_id_in_invocation, job_data in jobs.items():
@@ -161,7 +163,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
161
163
 
162
164
  results.append(
163
165
  {
164
- "invocation": job_id,
166
+ "invocation": job_data.invocation_id,
165
167
  "job_id": job_id_in_invocation,
166
168
  "status": (
167
169
  job_status if job_status is not None else "unknown"
@@ -176,7 +178,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
176
178
  except Exception as e:
177
179
  results.append(
178
180
  {
179
- "invocation": job_id,
181
+ "invocation": id_or_prefix,
180
182
  "job_id": None,
181
183
  "status": "error",
182
184
  "data": {"error": str(e)},
@@ -184,13 +186,13 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
184
186
  )
185
187
  else:
186
188
  # Otherwise, treat as job_id
187
- single_job_data: Optional[JobData] = db.get_job(job_id)
189
+ single_job_data: Optional[JobData] = db.get_job(id_or_prefix)
188
190
 
189
191
  if single_job_data is None:
190
192
  results.append(
191
193
  {
192
194
  "invocation": None,
193
- "job_id": job_id,
195
+ "job_id": id_or_prefix,
194
196
  "status": "not_found",
195
197
  "data": {},
196
198
  }
@@ -204,7 +206,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
204
206
  results.append(
205
207
  {
206
208
  "invocation": None,
207
- "job_id": job_id,
209
+ "job_id": id_or_prefix,
208
210
  "status": "error",
209
211
  "data": {"error": str(e)},
210
212
  }
@@ -213,13 +215,13 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
213
215
 
214
216
  # Get status from the executor
215
217
  try:
216
- status_list = executor_cls.get_status(job_id)
218
+ status_list = executor_cls.get_status(id_or_prefix)
217
219
 
218
220
  if not status_list:
219
221
  results.append(
220
222
  {
221
223
  "invocation": single_job_data.invocation_id,
222
- "job_id": job_id,
224
+ "job_id": single_job_data.job_id,
223
225
  "status": "unknown",
224
226
  "data": single_job_data.data,
225
227
  }
@@ -229,7 +231,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
229
231
  results.append(
230
232
  {
231
233
  "invocation": single_job_data.invocation_id,
232
- "job_id": job_id,
234
+ "job_id": single_job_data.job_id,
233
235
  "status": (
234
236
  status_list[0].state.value if status_list else "unknown"
235
237
  ),
@@ -246,7 +248,9 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
246
248
  "invocation": (
247
249
  single_job_data.invocation_id if single_job_data else None
248
250
  ),
249
- "job_id": job_id,
251
+ "job_id": (
252
+ single_job_data.job_id if single_job_data else id_or_prefix
253
+ ),
250
254
  "status": "error",
251
255
  "data": {"error": str(e)},
252
256
  }
@@ -15,6 +15,8 @@
15
15
  #
16
16
  """Main CLI module using simple-parsing with subcommands."""
17
17
 
18
+ import os
19
+
18
20
  from simple_parsing import ArgumentParser
19
21
 
20
22
  import nemo_evaluator_launcher.cli.export as export
@@ -29,6 +31,22 @@ from nemo_evaluator_launcher.common.logging_utils import logger
29
31
  VERSION_HELP = "Show version information"
30
32
 
31
33
 
34
+ def is_verbose_enabled(args) -> bool:
35
+ """Check if verbose flag is enabled in any subcommand."""
36
+ # Check global verbose flag
37
+ if hasattr(args, "verbose") and args.verbose:
38
+ return True
39
+
40
+ # Check subcommand verbose flags
41
+ subcommands = ["run", "status", "kill", "tasks_alias", "tasks", "runs", "export"]
42
+ for subcmd in subcommands:
43
+ if hasattr(args, subcmd) and hasattr(getattr(args, subcmd), "verbose"):
44
+ if getattr(getattr(args, subcmd), "verbose"):
45
+ return True
46
+
47
+ return False
48
+
49
+
32
50
  def create_parser() -> ArgumentParser:
33
51
  """Create and configure the CLI argument parser with subcommands."""
34
52
  parser = ArgumentParser()
@@ -36,6 +54,14 @@ def create_parser() -> ArgumentParser:
36
54
  # Add --version flag at the top level
37
55
  parser.add_argument("--version", action="store_true", help=VERSION_HELP)
38
56
 
57
+ # Add --verbose/-v flag for debug logging
58
+ parser.add_argument(
59
+ "-v",
60
+ "--verbose",
61
+ action="store_true",
62
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
63
+ )
64
+
39
65
  subparsers = parser.add_subparsers(dest="command", required=False)
40
66
 
41
67
  # Version subcommand
@@ -50,12 +76,24 @@ def create_parser() -> ArgumentParser:
50
76
  run_parser = subparsers.add_parser(
51
77
  "run", help="Run evaluation", description="Run evaluation"
52
78
  )
79
+ run_parser.add_argument(
80
+ "-v",
81
+ "--verbose",
82
+ action="store_true",
83
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
84
+ )
53
85
  run_parser.add_arguments(run.Cmd, dest="run")
54
86
 
55
87
  # Status subcommand
56
88
  status_parser = subparsers.add_parser(
57
89
  "status", help="Check job status", description="Check job status"
58
90
  )
91
+ status_parser.add_argument(
92
+ "-v",
93
+ "--verbose",
94
+ action="store_true",
95
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
96
+ )
59
97
  status_parser.add_arguments(status.Cmd, dest="status")
60
98
 
61
99
  # Kill subcommand
@@ -64,12 +102,24 @@ def create_parser() -> ArgumentParser:
64
102
  help="Kill a job or invocation",
65
103
  description="Kill a job (e.g., aefc4819.0) or entire invocation (e.g., aefc4819) by its ID",
66
104
  )
105
+ kill_parser.add_argument(
106
+ "-v",
107
+ "--verbose",
108
+ action="store_true",
109
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
110
+ )
67
111
  kill_parser.add_arguments(kill.Cmd, dest="kill")
68
112
 
69
113
  # Ls subcommand (with nested subcommands)
70
114
  ls_parser = subparsers.add_parser(
71
115
  "ls", help="List resources", description="List tasks or runs"
72
116
  )
117
+ ls_parser.add_argument(
118
+ "-v",
119
+ "--verbose",
120
+ action="store_true",
121
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
122
+ )
73
123
  # Add arguments from `ls tasks` so that they work with `ls` as default alias
74
124
  ls_parser.add_arguments(ls_tasks.Cmd, dest="tasks_alias")
75
125
 
@@ -95,6 +145,12 @@ def create_parser() -> ArgumentParser:
95
145
  help="Export evaluation results",
96
146
  description="Export evaluation results takes a List of invocation ids and a list of destinations(local, gitlab, wandb)",
97
147
  )
148
+ export_parser.add_argument(
149
+ "-v",
150
+ "--verbose",
151
+ action="store_true",
152
+ help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
153
+ )
98
154
  export_parser.add_arguments(export.ExportCmd, dest="export")
99
155
 
100
156
  return parser
@@ -105,6 +161,10 @@ def main() -> None:
105
161
  parser = create_parser()
106
162
  args = parser.parse_args()
107
163
 
164
+ # Handle --verbose flag
165
+ if is_verbose_enabled(args):
166
+ os.environ["LOG_LEVEL"] = "DEBUG"
167
+
108
168
  # Handle --version flag
109
169
  if hasattr(args, "version") and args.version:
110
170
  version_cmd = version.Cmd()
@@ -59,6 +59,13 @@ class Cmd:
59
59
  alias=["-n", "--dry-run"],
60
60
  metadata={"help": "Do not run the evaluation, just print the config."},
61
61
  )
62
+ config_output: str | None = field(
63
+ default=None,
64
+ alias=["--config-output"],
65
+ metadata={
66
+ "help": "Directory to save the complete run config. Defaults to ~/.nemo-evaluator/run_configs/"
67
+ },
68
+ )
62
69
 
63
70
  def execute(self) -> None:
64
71
  # Import heavy dependencies only when needed
@@ -93,12 +100,19 @@ class Cmd:
93
100
 
94
101
  invocation_id = run_eval(config, self.dry_run)
95
102
 
96
- # Save the complete configuration to the raw_configs directory
103
+ # Save the complete configuration
97
104
  if not self.dry_run and invocation_id is not None:
98
- # Create ~/.nemo-evaluator/run_configs directory
99
- home_dir = pathlib.Path.home()
100
- run_configs_dir = home_dir / ".nemo-evaluator" / "run_configs"
101
- run_configs_dir.mkdir(parents=True, exist_ok=True)
105
+ # Determine config output directory
106
+ if self.config_output:
107
+ # Use custom directory specified by --config-output
108
+ config_dir = pathlib.Path(self.config_output)
109
+ else:
110
+ # Default to original location: ~/.nemo-evaluator/run_configs
111
+ home_dir = pathlib.Path.home()
112
+ config_dir = home_dir / ".nemo-evaluator" / "run_configs"
113
+
114
+ # Ensure the directory exists
115
+ config_dir.mkdir(parents=True, exist_ok=True)
102
116
 
103
117
  # Convert DictConfig to dict and save as YAML
104
118
  config_dict = OmegaConf.to_container(config, resolve=True)
@@ -108,7 +122,7 @@ class Cmd:
108
122
 
109
123
  # Create config filename with invocation ID
110
124
  config_filename = f"{invocation_id}_config.yml"
111
- config_path = run_configs_dir / config_filename
125
+ config_path = config_dir / config_filename
112
126
 
113
127
  # Save the complete Hydra configuration
114
128
  with open(config_path, "w") as f:
@@ -17,6 +17,8 @@ from dataclasses import dataclass
17
17
 
18
18
  from simple_parsing import field
19
19
 
20
+ from nemo_evaluator_launcher.executors.base import ExecutionState
21
+
20
22
 
21
23
  @dataclass
22
24
  class Cmd:
@@ -96,10 +98,14 @@ class Cmd:
96
98
  else:
97
99
  location = ""
98
100
 
101
+ # Format status with visual indicators and colors
102
+ status = job.get("status", "")
103
+ formatted_status = self._format_status_with_indicators(status)
104
+
99
105
  rows.append(
100
106
  [
101
107
  job.get("job_id", ""),
102
- job.get("status", ""),
108
+ formatted_status,
103
109
  # job.get("progress", ""), temporarily disabled as this is a WIP feature
104
110
  executor_info,
105
111
  location,
@@ -108,7 +114,10 @@ class Cmd:
108
114
 
109
115
  # Calculate column widths and print
110
116
  widths = [
111
- max(len(str(headers[i])), max(len(str(row[i])) for row in rows))
117
+ max(
118
+ len(str(headers[i])),
119
+ max(len(self._strip_ansi_codes(str(row[i]))) for row in rows),
120
+ )
112
121
  for i in range(len(headers))
113
122
  ]
114
123
 
@@ -119,4 +128,34 @@ class Cmd:
119
128
  print("-" * len(header_row))
120
129
 
121
130
  for row in rows:
122
- print(" | ".join(str(row[i]).ljust(widths[i]) for i in range(len(row))))
131
+ # Adjust padding for ANSI color codes
132
+ formatted_row = []
133
+ for i in range(len(row)):
134
+ content = str(row[i])
135
+ visible_length = len(self._strip_ansi_codes(content))
136
+ padding = widths[i] - visible_length
137
+ formatted_row.append(content + " " * padding)
138
+ print(" | ".join(formatted_row))
139
+
140
+ def _format_status_with_indicators(self, status: str) -> str:
141
+ """Format status with Unicode visual indicators only."""
142
+ # Status mapping based on ExecutionState enum
143
+ status_formats = {
144
+ ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m", # Green Unicode checkmark
145
+ ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m", # Red Unicode X
146
+ ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m", # Yellow Unicode play button
147
+ ExecutionState.PENDING.value: "\033[36m⏳ PENDING\033[0m", # Cyan Unicode hourglass
148
+ ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m", # Magenta Unicode X
149
+ # Additional states for error handling
150
+ "not_found": "\033[90m? NOT FOUND\033[0m", # Gray question mark
151
+ "error": "\033[31m✗ ERROR\033[0m", # Red Unicode X
152
+ }
153
+
154
+ return status_formats.get(status.lower(), f"\033[90m? {status.upper()}\033[0m")
155
+
156
+ def _strip_ansi_codes(self, text: str) -> str:
157
+ """Remove ANSI color codes from text for length calculation."""
158
+ import re
159
+
160
+ ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
161
+ return ansi_escape.sub("", text)
@@ -29,15 +29,15 @@ EXEC_DB_FILE = EXEC_DB_DIR / "exec.v1.jsonl"
29
29
 
30
30
 
31
31
  def generate_invocation_id() -> str:
32
- """Generate a unique invocation ID as an 8-digit hex string."""
33
- return secrets.token_hex(4)
32
+ """Generate a unique invocation ID as an 16-digit hex string."""
33
+ return secrets.token_hex(8)
34
34
 
35
35
 
36
36
  def generate_job_id(invocation_id: str, index: int) -> str:
37
37
  """Generate a job ID as <invocation_id>.<n>.
38
38
 
39
39
  Args:
40
- invocation_id: The invocation group ID (8-digit hex).
40
+ invocation_id: The invocation group ID (16-digit hex).
41
41
  index: The job index (0-based integer).
42
42
  Returns:
43
43
  The job ID string.
@@ -50,7 +50,7 @@ class JobData:
50
50
  """Data structure for job execution information.
51
51
 
52
52
  Attributes:
53
- invocation_id: 8-digit hex string.
53
+ invocation_id: 16-digit hex string.
54
54
  job_id: <invocation_id>.<n> string.
55
55
  timestamp: Unix timestamp when the job was created.
56
56
  executor: Name of the executor that handled this job.
@@ -148,41 +148,135 @@ class ExecutionDB:
148
148
  )
149
149
  raise
150
150
 
151
+ def _resolve_invocation_id(self, short_id: str) -> Optional[str]:
152
+ """Resolve a short invocation ID to the full one.
153
+
154
+ Args:
155
+ short_id: Partial or full invocation ID.
156
+
157
+ Returns:
158
+ Full invocation ID if found uniquely, None if not found.
159
+
160
+ Raises:
161
+ ValueError: If the short_id matches multiple invocation IDs.
162
+ """
163
+ if not short_id:
164
+ return None
165
+
166
+ short_id = short_id.lower()
167
+
168
+ # NOTE(agronskiy): this is a non-optimized implementation that assumes small amount
169
+ # of jobs in ExecDB(), a typical scenario. Speeding up would involve building a
170
+ # prefix tree when loading invocations/jobs.
171
+ matches = [
172
+ inv_id
173
+ for inv_id in self._invocations.keys()
174
+ if inv_id.lower().startswith(short_id)
175
+ ]
176
+
177
+ if len(matches) == 1:
178
+ return matches[0]
179
+ elif len(matches) > 1:
180
+ raise ValueError(f"Ambiguous invocation ID '{short_id}': matches {matches}")
181
+ else:
182
+ return None
183
+
184
+ def _resolve_job_id(self, short_job_id: str) -> Optional[str]:
185
+ """Resolve a short job ID to the full one.
186
+
187
+ Args:
188
+ short_job_id: Partial or full job ID.
189
+
190
+ Returns:
191
+ Full job ID if found uniquely, None if not found.
192
+
193
+ Raises:
194
+ ValueError: If the short_job_id matches multiple job IDs.
195
+ """
196
+ if not short_job_id:
197
+ return None
198
+
199
+ # Normalize to lowercase for case-insensitive matching
200
+ short_job_id = short_job_id.lower()
201
+
202
+ if "." in short_job_id:
203
+ parts = short_job_id.split(".", 1)
204
+ short_inv_id, job_index = parts[0], parts[1]
205
+
206
+ # Resolve the invocation part
207
+ full_inv_id = self._resolve_invocation_id(short_inv_id)
208
+ if full_inv_id:
209
+ candidate_job_id = f"{full_inv_id}.{job_index}"
210
+ if candidate_job_id in self._jobs:
211
+ return candidate_job_id
212
+
213
+ # NOTE(agronskiy): unfortunately, due to legacy, there exist usecases where
214
+ # job_id is the same format as invocation_id
215
+ candidate_job_id = self._resolve_invocation_id(short_job_id)
216
+ if candidate_job_id and candidate_job_id in self._jobs:
217
+ return candidate_job_id
218
+
219
+ return None
220
+
151
221
  def get_job(self, job_id: str) -> Optional[JobData]:
152
- return self._jobs.get(job_id)
222
+ """Get job by full or partial job ID.
153
223
 
154
- def get_jobs(self, invocation_id: str) -> Dict[str, JobData]:
155
- job_ids = self._invocations.get(invocation_id, [])
156
- return {
157
- job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
158
- }
224
+ Args:
225
+ job_id: Full or partial job ID.
159
226
 
160
- def get_invocation_jobs(self, invocation_id: str) -> List[str]:
161
- return self._invocations.get(invocation_id, [])
227
+ Returns:
228
+ JobData if found, None otherwise.
162
229
 
163
- def get_all_jobs(self) -> Dict[str, JobData]:
164
- """Return a copy of all jobs in the execution DB."""
165
- return dict(self._jobs)
230
+ Raises:
231
+ ValueError: If the job_id matches multiple jobs.
232
+ """
233
+ resolved_id = self._resolve_job_id(job_id)
234
+ if resolved_id:
235
+ return self._jobs.get(resolved_id)
166
236
 
237
+ return None
167
238
 
168
- def write_job(job: JobData) -> None:
169
- db = ExecutionDB()
170
- db.write_job(job)
239
+ def get_jobs(self, invocation_id: str) -> Dict[str, JobData]:
240
+ """Get all jobs for a full or partial invocation ID.
171
241
 
242
+ Args:
243
+ invocation_id: Full or partial invocation ID.
172
244
 
173
- def get_job(job_id: str) -> Optional[JobData]:
174
- db = ExecutionDB()
175
- return db.get_job(job_id)
245
+ Returns:
246
+ Dictionary mapping job_id to JobData for all jobs in the invocation.
176
247
 
248
+ Raises:
249
+ ValueError: If the invocation_id matches multiple invocations.
250
+ """
251
+ resolved_inv_id = self._resolve_invocation_id(invocation_id)
252
+ if not resolved_inv_id:
253
+ return {}
177
254
 
178
- def get_jobs(invocation_id: str) -> Dict[str, JobData]:
179
- db = ExecutionDB()
180
- return db.get_jobs(invocation_id)
255
+ job_ids = self._invocations.get(resolved_inv_id, [])
256
+ return {
257
+ job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
258
+ }
259
+
260
+ def get_invocation_jobs(self, invocation_id: str) -> List[str]:
261
+ """Get job IDs for a full or partial invocation ID.
262
+
263
+ Args:
264
+ invocation_id: Full or partial invocation ID.
181
265
 
266
+ Returns:
267
+ List of job IDs for the invocation.
182
268
 
183
- def get_all_jobs() -> Dict[str, JobData]:
184
- db = ExecutionDB()
185
- return db.get_all_jobs()
269
+ Raises:
270
+ ValueError: If the invocation_id matches multiple invocations.
271
+ """
272
+ resolved_inv_id = self._resolve_invocation_id(invocation_id)
273
+ if not resolved_inv_id:
274
+ return []
275
+ return self._invocations.get(resolved_inv_id, [])
276
+
277
+ def get_all_jobs(self) -> Dict[str, JobData]:
278
+ """Return a copy of all jobs in the execution DB."""
279
+ return dict(self._jobs)
186
280
 
187
281
 
188
282
  # Ensure all the paths
@@ -75,7 +75,7 @@ def get_eval_factory_command(
75
75
  create_file_cmd = _yaml_to_echo_command(
76
76
  yaml.safe_dump(config_fields), "config_ef.yaml"
77
77
  )
78
- eval_command = f"""eval-factory run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
78
+ eval_command = f"""cmd=$([[ $(command -v nemo-evaluator) ]] && echo 'nemo-evaluator' || echo 'eval-factory') && $cmd run_eval --model_id {model_id} --model_type {model_type} --eval_type {eval_type} --model_url {model_url} --api_key_name API_KEY --output_dir /results --run_config config_ef.yaml"""
79
79
 
80
80
  if overrides:
81
81
  eval_command = f"{eval_command} --overrides {overrides_str}"
@@ -0,0 +1,33 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Generic server deployment configuration template
17
+ #
18
+ type: generic
19
+ image: ??? # Docker image to use for deployment
20
+ command: ??? # Command to run the server
21
+
22
+ # Server configuration
23
+ port: 8000
24
+ served_model_name: ??? # Name of the served model (used in command templates and evaluation)
25
+ extra_args: "" # Additional command line arguments
26
+ env_vars: {} # Environment variables as {name: value} dict
27
+ checkpoint_path: null # Path to model checkpoint
28
+
29
+ # API endpoints (customize based on your server)
30
+ endpoints:
31
+ chat: /v1/chat/completions
32
+ completions: /v1/completions
33
+ health: /health
@@ -0,0 +1,24 @@
1
+ type: trtllm
2
+ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
3
+ checkpoint_path: ???
4
+ served_model_name: ???
5
+ port: 8000
6
+ tensor_parallel_size: 4
7
+ pipeline_parallel_size: 1
8
+ extra_args: ""
9
+
10
+ endpoints:
11
+ chat: /v1/chat/completions
12
+ completions: /v1/completions
13
+ health: /health
14
+
15
+ command:
16
+ mpirun --allow-run-as-root --oversubscribe
17
+ trtllm-serve serve /checkpoint
18
+ --tp_size=${deployment.tensor_parallel_size}
19
+ --pp_size=${deployment.pipeline_parallel_size}
20
+ --host 0.0.0.0
21
+ --port ${deployment.port}
22
+ --backend pytorch
23
+ --trust_remote_code
24
+ ${deployment.extra_args}
@@ -559,7 +559,7 @@ class LeptonExecutor(BaseExecutor):
559
559
  db = ExecutionDB()
560
560
 
561
561
  # If id looks like an invocation_id (8 hex digits, no dot), get all jobs for it
562
- if len(id) == 8 and "." not in id:
562
+ if "." not in id:
563
563
  return _get_statuses_for_invocation_id(id=id, db=db)
564
564
  # Otherwise, treat as job_id
565
565
  job_data = db.get_job(id)
@@ -185,26 +185,6 @@ class LocalExecutor(BaseExecutor):
185
185
  run_all_sequentially_sh_content
186
186
  )
187
187
 
188
- # Save launched jobs metadata
189
- db = ExecutionDB()
190
- for job_id, task, evaluation_task in zip(
191
- job_ids, cfg.evaluation.tasks, evaluation_tasks
192
- ):
193
- db.write_job(
194
- job=JobData(
195
- invocation_id=invocation_id,
196
- job_id=job_id,
197
- timestamp=time.time(),
198
- executor="local",
199
- data={
200
- "output_dir": str(evaluation_task["output_dir"]),
201
- "container": evaluation_task["container_name"],
202
- "eval_image": evaluation_task["eval_image"],
203
- },
204
- config=OmegaConf.to_object(cfg),
205
- )
206
- )
207
-
208
188
  if dry_run:
209
189
  print("\n\n=============================================\n\n")
210
190
  print(f"DRY RUN: Scripts prepared and saved to {output_dir}")
@@ -225,6 +205,26 @@ class LocalExecutor(BaseExecutor):
225
205
  print("\nTo execute, run without --dry-run")
226
206
  return invocation_id
227
207
 
208
+ # Save launched jobs metadata
209
+ db = ExecutionDB()
210
+ for job_id, task, evaluation_task in zip(
211
+ job_ids, cfg.evaluation.tasks, evaluation_tasks
212
+ ):
213
+ db.write_job(
214
+ job=JobData(
215
+ invocation_id=invocation_id,
216
+ job_id=job_id,
217
+ timestamp=time.time(),
218
+ executor="local",
219
+ data={
220
+ "output_dir": str(evaluation_task["output_dir"]),
221
+ "container": evaluation_task["container_name"],
222
+ "eval_image": evaluation_task["eval_image"],
223
+ },
224
+ config=OmegaConf.to_object(cfg),
225
+ )
226
+ )
227
+
228
228
  # Launch bash scripts with Popen for non-blocking execution.
229
229
  # To ensure subprocess continues after python exits:
230
230
  # - on Unix-like systems, to fully detach the subprocess
@@ -281,8 +281,8 @@ class LocalExecutor(BaseExecutor):
281
281
  """
282
282
  db = ExecutionDB()
283
283
 
284
- # If id looks like an invocation_id (8 hex digits, no dot), get all jobs for it
285
- if len(id) == 8 and "." not in id:
284
+ # If id looks like an invocation_id (no dot), get all jobs for it
285
+ if "." not in id:
286
286
  jobs = db.get_jobs(id)
287
287
  statuses: List[ExecutionStatus] = []
288
288
  for job_id, _ in jobs.items():
@@ -34,7 +34,7 @@ echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.pre-start"
34
34
  # Docker run with eval factory command
35
35
  (
36
36
  echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running"
37
- docker run --rm --shm-size=100g \
37
+ docker run --rm --shm-size=100g --network=host \
38
38
  --name {{ task.container_name }} \
39
39
  --volume "$artifacts_dir":/results \
40
40
  {% for env_var in task.env_vars -%}
@@ -204,8 +204,8 @@ class SlurmExecutor(BaseExecutor):
204
204
  """
205
205
  db = ExecutionDB()
206
206
 
207
- # If id looks like an invocation_id (8 hex digits, no dot), get all jobs for it
208
- if len(id) == 8 and "." not in id:
207
+ # If id looks like an invocation_id (no dot), get all jobs for it
208
+ if "." not in id:
209
209
  jobs = db.get_jobs(id)
210
210
  if not jobs:
211
211
  return []
@@ -16,7 +16,7 @@
16
16
  # Below is the _next_ version that will be published, not the currently published one.
17
17
  MAJOR = 0
18
18
  MINOR = 1
19
- PATCH = 8
19
+ PATCH = 10
20
20
  PRE_RELEASE = ""
21
21
 
22
22
  # Use the following formatting: (major, minor, patch, pre-release)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -29,9 +29,11 @@ src/nemo_evaluator_launcher/common/logging_utils.py
29
29
  src/nemo_evaluator_launcher/common/mapping.py
30
30
  src/nemo_evaluator_launcher/configs/__init__.py
31
31
  src/nemo_evaluator_launcher/configs/default.yaml
32
+ src/nemo_evaluator_launcher/configs/deployment/generic.yaml
32
33
  src/nemo_evaluator_launcher/configs/deployment/nim.yaml
33
34
  src/nemo_evaluator_launcher/configs/deployment/none.yaml
34
35
  src/nemo_evaluator_launcher/configs/deployment/sglang.yaml
36
+ src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml
35
37
  src/nemo_evaluator_launcher/configs/deployment/vllm.yaml
36
38
  src/nemo_evaluator_launcher/configs/execution/local.yaml
37
39
  src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml