nemo-evaluator-launcher 0.1.17__tar.gz → 0.1.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/PKG-INFO +1 -1
  2. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/__init__.py +15 -1
  3. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/api/types.py +9 -0
  4. nemo_evaluator_launcher-0.1.17/src/nemo_evaluator_launcher/cli/debug.py → nemo_evaluator_launcher-0.1.19/src/nemo_evaluator_launcher/cli/info.py +170 -63
  5. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/main.py +10 -10
  6. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/run.py +39 -13
  7. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/status.py +9 -8
  8. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/common/helpers.py +55 -8
  9. nemo_evaluator_launcher-0.1.19/src/nemo_evaluator_launcher/common/printing_utils.py +93 -0
  10. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +5 -4
  11. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/lepton/executor.py +11 -1
  12. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/local/executor.py +28 -13
  13. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/local/run.template.sh +4 -1
  14. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/slurm/executor.py +22 -7
  15. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/package_info.py +1 -1
  16. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +1 -1
  17. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +2 -1
  18. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/LICENSE +0 -0
  19. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/README.md +0 -0
  20. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/pyproject.toml +0 -0
  21. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/setup.cfg +0 -0
  22. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
  23. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/api/functional.py +0 -0
  24. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/api/utils.py +0 -0
  25. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
  26. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/export.py +0 -0
  27. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
  28. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
  29. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
  30. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/cli/version.py +0 -0
  31. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
  32. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/common/execdb.py +0 -0
  33. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
  34. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
  35. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
  36. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
  37. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +0 -0
  38. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
  39. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
  40. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
  41. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +0 -0
  42. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
  43. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
  44. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
  45. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
  46. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/base.py +0 -0
  47. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
  48. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +0 -0
  49. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
  50. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
  51. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
  52. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
  53. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
  54. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/base.py +0 -0
  55. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/gsheets.py +0 -0
  56. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/local.py +0 -0
  57. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/mlflow.py +0 -0
  58. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
  59. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/utils.py +0 -0
  60. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/exporters/wandb.py +0 -0
  61. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher/resources/mapping.toml +0 -0
  62. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
  63. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
  64. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/requires.txt +0 -0
  65. {nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.19}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.17
3
+ Version: 0.1.19
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -20,6 +20,7 @@ It automatically initializes logging and conditionally loads internal components
20
20
  """
21
21
 
22
22
  import importlib
23
+ import warnings
23
24
 
24
25
  from nemo_evaluator_launcher.common.logging_utils import logger
25
26
  from nemo_evaluator_launcher.package_info import (
@@ -32,9 +33,22 @@ from nemo_evaluator_launcher.package_info import (
32
33
  __version__,
33
34
  )
34
35
 
35
- logger.info("Version info", pkg=__package_name__, ver=__version__)
36
+ # Suppress pydantic warnings from third-party libraries (e.g., wandb) that are not
37
+ # compatible with Pydantic 2.x field metadata on Python 3.13+
38
+ warnings.filterwarnings(
39
+ "ignore",
40
+ message=r"The 'repr' attribute.*Field\(\).*",
41
+ category=Warning,
42
+ )
43
+ warnings.filterwarnings(
44
+ "ignore",
45
+ message=r"The 'frozen' attribute.*Field\(\).*",
46
+ category=Warning,
47
+ )
36
48
 
37
49
 
50
+ logger.info("Version info", pkg=__package_name__, ver=__version__)
51
+
38
52
  try:
39
53
  importlib.import_module("nemo_evaluator_launcher_internal")
40
54
  logger.debug(
@@ -19,9 +19,18 @@ This module defines data structures and helpers for configuration and type safet
19
19
  """
20
20
 
21
21
  import os
22
+ import warnings
22
23
  from dataclasses import dataclass
23
24
  from typing import cast
24
25
 
26
+ # ruff: noqa: E402
27
+ # Later when adding optional module to hydra, since the internal package is optional,
28
+ # will generate a hydra warning. We suppress it as distraction and bad UX, before hydra gets invoked.
29
+ warnings.filterwarnings(
30
+ "ignore",
31
+ message="provider=hydra.searchpath.*path=nemo_evaluator_launcher_internal.*is not available\\.",
32
+ )
33
+
25
34
  import hydra
26
35
  from hydra.core.global_hydra import GlobalHydra
27
36
  from omegaconf import DictConfig, OmegaConf
@@ -14,16 +14,16 @@
14
14
  # limitations under the License.
15
15
  #
16
16
 
17
- """Debugging helper functionalities for nemo-evaluator-launcher."""
17
+ """Job information helper functionalities for nemo-evaluator-launcher."""
18
18
 
19
+ import sys
19
20
  from dataclasses import dataclass
20
21
  from datetime import datetime
21
22
  from pathlib import Path
22
- from typing import Any, Dict, List, Optional, Tuple
23
+ from typing import Any, Dict, List, Tuple
23
24
 
24
25
  from simple_parsing import field
25
26
 
26
- from nemo_evaluator_launcher.cli.export import ExportCmd
27
27
  from nemo_evaluator_launcher.cli.version import Cmd as VersionCmd
28
28
  from nemo_evaluator_launcher.common.execdb import EXEC_DB_FILE, ExecutionDB, JobData
29
29
  from nemo_evaluator_launcher.common.logging_utils import logger
@@ -35,52 +35,60 @@ _EXPORT_HELPER = LocalExporter({})
35
35
 
36
36
 
37
37
  @dataclass
38
- class DebugCmd(ExportCmd):
39
- """Debugging functionalities for nemo-evaluator-launcher.
38
+ class InfoCmd:
39
+ """Job information functionalities for nemo-evaluator-launcher.
40
40
 
41
41
  Examples:
42
- nemo-evaluator-launcher debug <inv> # Full debug info
43
- nemo-evaluator-launcher debug <inv> --config # Show stored job config (YAML)
44
- nemo-evaluator-launcher debug <inv> --artifacts # Show artifact locations
45
- nemo-evaluator-launcher debug <inv> --logs # Show log locations
46
- nemo-evaluator-launcher debug <inv> --copy-logs <path> # Copy logs (default: current dir)
47
- nemo-evaluator-launcher debug <inv> --copy-artifacts <path> # Copy artifacts (default: current dir)
42
+ nemo-evaluator-launcher info <inv> # Full job info
43
+ nemo-evaluator-launcher info <inv> --config # Show stored job config (YAML)
44
+ nemo-evaluator-launcher info <inv> --artifacts # Show artifact locations and key files
45
+ nemo-evaluator-launcher info <inv> --logs # Show log locations and key files
46
+ nemo-evaluator-launcher info <inv> --copy-logs <DIR> # Copy logs to <DIR>
47
+ nemo-evaluator-launcher info <inv> --copy-artifacts <DIR> # Copy artifacts to <DIR>
48
48
 
49
49
  Notes:
50
- - Supports invocation IDs and job IDs
50
+ - Supports invocation IDs and job IDs (space-separated)
51
51
  - Shows local or remote paths depending on executor (local/slurm/lepton)
52
+ - Copy operations work for both local and remote jobs (expect longer time for remote jobs)
53
+ - Copy operations are not supported for Lepton executor (yet).
52
54
  """
53
55
 
54
- # local exporter destination defaults to local
55
- dest: str = field(default="local", init=False)
56
+ invocation_ids: List[str] = field(
57
+ positional=True,
58
+ help="IDs to show info for (space-separated). Accepts invocation IDs or/and job IDs.",
59
+ )
56
60
 
57
- # debug modes
58
- config: bool = field(default=False, help="Show job configuration")
59
- artifacts: bool = field(default=False, help="Show artifact locations")
60
- logs: bool = field(default=False, help="Show log locations")
61
+ # info modes
62
+ config: bool = field(
63
+ default=False, action="store_true", help="Show job configuration"
64
+ )
65
+ artifacts: bool = field(
66
+ default=False, action="store_true", help="Show artifact locations and key files"
67
+ )
68
+ logs: bool = field(
69
+ default=False, action="store_true", help="Show log locations and key files"
70
+ )
61
71
 
62
- # copy operations
63
- copy_logs: Optional[str] = field(
72
+ # copy operations - work for both local and remote jobs
73
+ copy_logs: str | None = field(
64
74
  default=None,
65
75
  alias=["--copy-logs"],
66
- nargs="?",
67
- help="Copy logs to local directory (default: current dir)",
76
+ help="Copy logs to a local directory",
77
+ metavar="DIR",
68
78
  )
69
- copy_artifacts: Optional[str] = field(
79
+ copy_artifacts: str | None = field(
70
80
  default=None,
71
81
  alias=["--copy-artifacts"],
72
- nargs="?",
73
- help="Copy artifacts to local directory (default: current dir)",
82
+ help="Copy artifacts to a local directory",
83
+ metavar="DIR",
74
84
  )
75
85
 
76
86
  def execute(self) -> None:
77
- # show version
78
87
  VersionCmd().execute()
79
-
80
- logger.info("Debug command started", invocation_ids=self.invocation_ids)
88
+ logger.info("Info command started", invocation_ids=self.invocation_ids)
81
89
 
82
90
  if not self.invocation_ids:
83
- logger.error("No invocation IDs provided")
91
+ logger.error("No job or invocation IDs provided.")
84
92
  raise ValueError("No job or invocation IDs provided.")
85
93
 
86
94
  jobs = self._resolve_jobs()
@@ -96,48 +104,63 @@ class DebugCmd(ExportCmd):
96
104
  "No valid jobs found (jobs may have been deleted or IDs may be incorrect)."
97
105
  )
98
106
  print(
99
- "No valid jobs found (jobs may have been deletedd or IDs may be incorrect)."
107
+ "No valid jobs found (jobs may have been deleted or IDs may be incorrect)."
100
108
  )
101
109
  return
102
110
 
111
+ # show ops
103
112
  if self.config:
104
- logger.info("Showing job configuration", job_count=len(jobs))
105
113
  self._show_config_info(jobs)
106
- elif self.logs:
107
- logger.info("Showing job logs locations", job_count=len(jobs))
114
+ if self.logs:
108
115
  self._show_logs_info(jobs)
109
- elif self.artifacts:
110
- logger.info("Showing artifacts locations", job_count=len(jobs))
116
+ if self.artifacts:
111
117
  self._show_artifacts_info(jobs)
112
- elif self.copy_logs is not None:
113
- dest = self.copy_logs or "."
114
- if not self.copy_logs:
115
- print(
116
- "No destination provided for --copy-logs; defaulting to current dir"
117
- )
118
+
119
+ # copy ops
120
+ args = sys.argv[1:]
121
+ copy_logs_flag = "--copy-logs" in args
122
+ copy_artifacts_flag = "--copy-artifacts" in args
123
+
124
+ if copy_logs_flag:
125
+ if self.copy_logs is None:
126
+ raise ValueError("--copy-logs requires a directory path")
127
+ if not self.copy_logs.strip():
128
+ raise ValueError("--copy-logs requires a directory path")
118
129
  logger.info(
119
- "Copying logs to local directory", dest_dir=dest, job_count=len(jobs)
130
+ "Copying logs to local directory",
131
+ dest_dir=self.copy_logs,
132
+ job_count=len(jobs),
120
133
  )
121
- self._copy_logs(jobs, dest)
122
- elif self.copy_artifacts is not None:
123
- dest = self.copy_artifacts or "."
124
- if not self.copy_artifacts:
125
- print(
126
- "No destination provided for --copy-artifacts; defaulting to current dir)"
127
- )
134
+ self._copy_logs(jobs, self.copy_logs)
135
+
136
+ if copy_artifacts_flag:
137
+ if self.copy_artifacts is None:
138
+ raise ValueError("--copy-artifacts requires a directory path")
139
+ if not self.copy_artifacts.strip():
140
+ raise ValueError("--copy-artifacts requires a directory path")
128
141
  logger.info(
129
142
  "Copying artifacts to local directory",
130
- dest_dir=dest,
143
+ dest_dir=self.copy_artifacts,
131
144
  job_count=len(jobs),
132
145
  )
133
- self._copy_artifacts(jobs, dest)
134
- else:
146
+ self._copy_artifacts(jobs, self.copy_artifacts)
147
+
148
+ # default view when no flags
149
+ if not any(
150
+ [
151
+ self.config,
152
+ self.logs,
153
+ self.artifacts,
154
+ self.copy_logs,
155
+ self.copy_artifacts,
156
+ ]
157
+ ):
135
158
  logger.info(
136
159
  "Job metadata details",
137
160
  invocation_id=jobs[0][1].invocation_id if jobs else None,
138
161
  jobs=len(jobs),
139
162
  )
140
- self._show_invocation_debug_info(jobs)
163
+ self._show_invocation_info(jobs)
141
164
 
142
165
  def _resolve_jobs(self) -> List[Tuple[str, JobData]]:
143
166
  """Resolve jobs from ExecDB using IDs (job IDs and/or invocation IDs)."""
@@ -160,15 +183,15 @@ class DebugCmd(ExportCmd):
160
183
  uniq.append((jid, jd))
161
184
  return sorted(uniq, key=lambda p: p[0])
162
185
 
163
- def _show_invocation_debug_info(self, jobs: List[Tuple[str, JobData]]) -> None:
186
+ def _show_invocation_info(self, jobs: List[Tuple[str, JobData]]) -> None:
164
187
  inv = jobs[0][1].invocation_id if jobs else None
165
- logger.info("Debug information", jobs=len(jobs), invocation=inv)
188
+ logger.info("Job information", jobs=len(jobs), invocation=inv)
166
189
  print(
167
- f"Debug information for {len(jobs)} job(s){f' under invocation {inv}' if inv else ''}:\n"
190
+ f"Job information for {len(jobs)} job(s){f' under invocation {inv}' if inv else ''}:\n"
168
191
  )
169
192
 
170
193
  for job_id, job_data in jobs:
171
- self._show_job_debug_info(job_id, job_data)
194
+ self._show_job_info(job_id, job_data)
172
195
  print()
173
196
 
174
197
  # footer hint: where to find more metadata
@@ -184,10 +207,14 @@ class DebugCmd(ExportCmd):
184
207
  print(" - Use --logs to show log locations.")
185
208
  print(" - Use --artifacts to show artifact locations.")
186
209
  print(" - Use --config to show stored job configuration (YAML).")
187
- print(" - Use --copy-logs [DIR] to copy logs to a local directory.")
188
- print(" - Use --copy-artifacts [DIR] to copy artifacts to a local directory.")
210
+ print(
211
+ " - Use --copy-logs [DIR] to copy logs to a local directory (works for local and remote jobs)."
212
+ )
213
+ print(
214
+ " - Use --copy-artifacts [DIR] to copy artifacts to a local directory (works for local and remote jobs)."
215
+ )
189
216
 
190
- def _show_job_debug_info(self, job_id: str, job_data: JobData) -> None:
217
+ def _show_job_info(self, job_id: str, job_data: JobData) -> None:
191
218
  logger.info("Job", job_id=job_id)
192
219
  print(f"Job {job_id}")
193
220
 
@@ -208,14 +235,22 @@ class DebugCmd(ExportCmd):
208
235
  logger.info("Task", job_id=job_id, name=task_name)
209
236
  print(f"├── Task: {task_name}")
210
237
 
238
+ # Determine executor type for file descriptions
239
+ cfg_exec_type = ((job_data.config or {}).get("execution") or {}).get("type")
240
+ exec_type = (job_data.executor or cfg_exec_type or "").lower()
241
+
211
242
  # locations via exporter helper
212
243
  paths = _EXPORT_HELPER.get_job_paths(job_data)
213
244
 
214
- # Artifacts
245
+ # Artifacts with file descriptions
246
+ artifacts_list = _get_artifacts_file_list()
215
247
  if paths.get("storage_type") == "remote_ssh":
216
248
  artifacts_path = f"{paths['username']}@{paths['hostname']}:{paths['remote_path']}/artifacts"
217
249
  logger.info("Artifacts", job_id=job_id, path=artifacts_path, remote=True)
218
250
  print(f"├── Artifacts: {artifacts_path} (remote)")
251
+ print("│ └── Key files:")
252
+ for filename, desc in artifacts_list:
253
+ print(f"│ ├── {filename} - {desc}")
219
254
  else:
220
255
  ap = paths.get("artifacts_dir")
221
256
  if ap:
@@ -224,14 +259,21 @@ class DebugCmd(ExportCmd):
224
259
  "Artifacts", job_id=job_id, path=str(ap), exists_indicator=exists
225
260
  )
226
261
  print(f"├── Artifacts: {ap} {exists} (local)")
262
+ print("│ └── Key files:")
263
+ for filename, desc in artifacts_list:
264
+ print(f"│ ├── {filename} - {desc}")
227
265
 
228
- # Logs
266
+ # Logs with file descriptions
267
+ logs_list = _get_log_file_list(exec_type)
229
268
  if paths.get("storage_type") == "remote_ssh":
230
269
  logs_path = (
231
270
  f"{paths['username']}@{paths['hostname']}:{paths['remote_path']}/logs"
232
271
  )
233
272
  logger.info("Logs", job_id=job_id, path=logs_path, remote=True)
234
273
  print(f"├── Logs: {logs_path} (remote)")
274
+ print("│ └── Key files:")
275
+ for filename, desc in logs_list:
276
+ print(f"│ ├── {filename} - {desc}")
235
277
  else:
236
278
  lp = paths.get("logs_dir")
237
279
  if lp:
@@ -240,6 +282,9 @@ class DebugCmd(ExportCmd):
240
282
  "Logs", job_id=job_id, path=str(lp), exists_indicator=exists
241
283
  )
242
284
  print(f"├── Logs: {lp} {exists} (local)")
285
+ print("│ └── Key files:")
286
+ for filename, desc in logs_list:
287
+ print(f"│ ├── {filename} - {desc}")
243
288
 
244
289
  # executor-specific
245
290
  d = job_data.data or {}
@@ -264,17 +309,23 @@ class DebugCmd(ExportCmd):
264
309
  eu = d.get("endpoint_url")
265
310
  if eu:
266
311
  print(f"├── Endpoint URL: {eu}")
267
- # local and others: paths already displayed above; no extra fields needed
268
312
 
269
313
  def _show_logs_info(self, jobs: List[Tuple[str, JobData]]) -> None:
270
314
  logger.info("Log locations")
271
315
  print("Log locations:\n")
272
316
  for job_id, job_data in jobs:
273
317
  paths = _EXPORT_HELPER.get_job_paths(job_data)
318
+ cfg_exec_type = ((job_data.config or {}).get("execution") or {}).get("type")
319
+ exec_type = (job_data.executor or cfg_exec_type or "").lower()
320
+ logs_list = _get_log_file_list(exec_type)
321
+
274
322
  if paths.get("storage_type") == "remote_ssh":
275
323
  logs_path = f"ssh://{paths['username']}@{paths['hostname']}{paths['remote_path']}/logs"
276
324
  logger.info("Logs", job_id=job_id, path=logs_path, remote=True)
277
325
  print(f"{job_id}: {logs_path} (remote)")
326
+ print(" └── Key files:")
327
+ for filename, desc in logs_list:
328
+ print(f" ├── {filename} - {desc}")
278
329
  else:
279
330
  lp = paths.get("logs_dir")
280
331
  if lp:
@@ -283,18 +334,26 @@ class DebugCmd(ExportCmd):
283
334
  "Logs", job_id=job_id, path=str(lp), exists_indicator=exists
284
335
  )
285
336
  print(f"{job_id}: {lp} {exists} (local)")
337
+ print(" └── Key files:")
338
+ for filename, desc in logs_list:
339
+ print(f" ├── {filename} - {desc}")
286
340
 
287
341
  def _show_artifacts_info(self, jobs: List[Tuple[str, JobData]]) -> None:
288
342
  logger.info("Artifact locations")
289
343
  print("Artifact locations:\n")
290
344
  for job_id, job_data in jobs:
291
345
  paths = _EXPORT_HELPER.get_job_paths(job_data)
346
+ artifacts_list = _get_artifacts_file_list()
347
+
292
348
  if paths.get("storage_type") == "remote_ssh":
293
349
  artifacts_path = f"ssh://{paths['username']}@{paths['hostname']}{paths['remote_path']}/artifacts"
294
350
  logger.info(
295
351
  "Artifacts", job_id=job_id, path=artifacts_path, remote=True
296
352
  )
297
353
  print(f"{job_id}: {artifacts_path} (remote)")
354
+ print(" └── Key files:")
355
+ for filename, desc in artifacts_list:
356
+ print(f" ├── {filename} - {desc}")
298
357
  else:
299
358
  ap = paths.get("artifacts_dir")
300
359
  if ap:
@@ -306,6 +365,9 @@ class DebugCmd(ExportCmd):
306
365
  exists_indicator=exists,
307
366
  )
308
367
  print(f"{job_id}: {ap} {exists} (local)")
368
+ print(" └── Key files:")
369
+ for filename, desc in artifacts_list:
370
+ print(f" ├── {filename} - {desc}")
309
371
 
310
372
  def _show_config_info(self, jobs: List[Tuple[str, JobData]]) -> None:
311
373
  for job_id, job_data in jobs:
@@ -383,6 +445,9 @@ class DebugCmd(ExportCmd):
383
445
  print(
384
446
  f"{jid}: Failed - {job_result.get('message', 'Unknown error')}"
385
447
  )
448
+ # Show full destination path
449
+ full_dest_path = Path(dest_dir).resolve()
450
+ print(f"Copied to: {full_dest_path}")
386
451
  else:
387
452
  err = result.get("error", "Unknown error")
388
453
  logger.warning("Content copy failed", error=err, dest_dir=dest_dir)
@@ -403,3 +468,45 @@ class DebugCmd(ExportCmd):
403
468
  except Exception:
404
469
  pass
405
470
  return ""
471
+
472
+
473
+ # Helper functions for file descriptions (based on actual code and content analysis)
474
+ def _get_artifacts_file_list() -> list[tuple[str, str]]:
475
+ """Files generated in artifacts/."""
476
+ return [
477
+ (
478
+ "results.yml",
479
+ "Benchmark scores, task results and resolved run configuration.",
480
+ ),
481
+ (
482
+ "eval_factory_metrics.json",
483
+ "Response + runtime stats (latency, tokens count, memory)",
484
+ ),
485
+ ("metrics.json", "Harness/benchmark metric and configuration"),
486
+ ("report.html", "Request-Response Pairs samples in HTML format (if enabled)"),
487
+ ("report.json", "Report data in json format, if enabled"),
488
+ ]
489
+
490
+
491
+ def _get_log_file_list(executor_type: str) -> list[tuple[str, str]]:
492
+ """Files actually generated in logs/ - executor-specific."""
493
+ et = (executor_type or "local").lower()
494
+ if et == "slurm":
495
+ return [
496
+ ("client-{SLURM_JOB_ID}.out", "Evaluation container/process output"),
497
+ (
498
+ "slurm-{SLURM_JOB_ID}.out",
499
+ "SLURM scheduler stdout/stderr (batch submission, export steps).",
500
+ ),
501
+ (
502
+ "server-{SLURM_JOB_ID}.out",
503
+ "Model server logs when a deployment is used.",
504
+ ),
505
+ ]
506
+ # local executor
507
+ return [
508
+ (
509
+ "stdout.log",
510
+ "Complete evaluation output (timestamps, resolved config, run/export messages).",
511
+ ),
512
+ ]
@@ -19,8 +19,8 @@ import os
19
19
 
20
20
  from simple_parsing import ArgumentParser
21
21
 
22
- import nemo_evaluator_launcher.cli.debug as debug
23
22
  import nemo_evaluator_launcher.cli.export as export
23
+ import nemo_evaluator_launcher.cli.info as info
24
24
  import nemo_evaluator_launcher.cli.kill as kill
25
25
  import nemo_evaluator_launcher.cli.ls_runs as ls_runs
26
26
  import nemo_evaluator_launcher.cli.ls_tasks as ls_tasks
@@ -42,12 +42,12 @@ def is_verbose_enabled(args) -> bool:
42
42
  subcommands = [
43
43
  "run",
44
44
  "status",
45
+ "info",
45
46
  "kill",
46
47
  "tasks_alias",
47
48
  "tasks",
48
49
  "runs",
49
50
  "export",
50
- "debug",
51
51
  ]
52
52
  for subcmd in subcommands:
53
53
  if hasattr(args, subcmd) and hasattr(getattr(args, subcmd), "verbose"):
@@ -163,16 +163,16 @@ def create_parser() -> ArgumentParser:
163
163
  )
164
164
  export_parser.add_arguments(export.ExportCmd, dest="export")
165
165
 
166
- # Debug helper subcommand
167
- debug_parser = subparsers.add_parser(
168
- "debug",
166
+ # Info subcommand
167
+ info_parser = subparsers.add_parser(
168
+ "info",
169
169
  help="Display evaluation job information",
170
- description="Debug helper functionalities for nemo-evaluator-launcher",
170
+ description="Info functionalities for nemo-evaluator-launcher",
171
171
  )
172
- debug_parser.add_argument(
172
+ info_parser.add_argument(
173
173
  "-v", "--verbose", action="store_true", help="Enable verbose logging"
174
174
  )
175
- debug_parser.add_arguments(debug.DebugCmd, dest="debug")
175
+ info_parser.add_arguments(info.InfoCmd, dest="info")
176
176
 
177
177
  return parser
178
178
 
@@ -218,8 +218,8 @@ def main() -> None:
218
218
  args.runs.execute()
219
219
  elif args.command == "export":
220
220
  args.export.execute()
221
- elif args.command == "debug":
222
- args.debug.execute()
221
+ elif args.command == "info":
222
+ args.info.execute()
223
223
 
224
224
 
225
225
  if __name__ == "__main__":
@@ -19,6 +19,15 @@ from dataclasses import dataclass
19
19
 
20
20
  from simple_parsing import field
21
21
 
22
+ from nemo_evaluator_launcher.common.logging_utils import logger
23
+ from nemo_evaluator_launcher.common.printing_utils import (
24
+ bold,
25
+ cyan,
26
+ green,
27
+ magenta,
28
+ red,
29
+ )
30
+
22
31
 
23
32
  @dataclass
24
33
  class Cmd:
@@ -101,15 +110,10 @@ class Cmd:
101
110
  try:
102
111
  invocation_id = run_eval(config, self.dry_run)
103
112
  except Exception as e:
104
- print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
113
+ print(red(f"✗ Job submission failed, see logs | Error: {e}"))
114
+ logger.error("Job submission failed", error=e)
105
115
  raise
106
116
 
107
- # Print general success message with invocation ID
108
- if invocation_id is not None and not self.dry_run:
109
- print(
110
- f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
111
- )
112
-
113
117
  # Save the complete configuration
114
118
  if not self.dry_run and invocation_id is not None:
115
119
  # Determine config output directory
@@ -151,14 +155,22 @@ class Cmd:
151
155
  f.write("#\n")
152
156
  f.write(config_yaml)
153
157
 
154
- print(f"Complete run config saved to: {config_path}")
158
+ print(bold(cyan("Complete run config saved to: ")) + f"\n {config_path}\n")
159
+ logger.info("Saved complete config", path=config_path)
155
160
 
156
- if invocation_id is not None:
157
- print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
158
- print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
161
+ # Print general success message with invocation ID and helpful commands
162
+ if invocation_id is not None and not self.dry_run:
163
+ print(
164
+ bold(cyan("To check status: "))
165
+ + f"nemo-evaluator-launcher status {invocation_id}"
166
+ )
167
+ print(
168
+ bold(cyan("To kill all jobs: "))
169
+ + f"nemo-evaluator-launcher kill {invocation_id}"
170
+ )
159
171
 
160
172
  # Show actual job IDs and task names
161
- print("to kill individual jobs:")
173
+ print(bold(cyan("To kill individual jobs:")))
162
174
  # Access tasks - will work after normalization in run_eval
163
175
  tasks = (
164
176
  config.evaluation.tasks
@@ -168,7 +180,21 @@ class Cmd:
168
180
  for idx, task in enumerate(tasks):
169
181
  job_id = f"{invocation_id}.{idx}"
170
182
  print(f" nemo-evaluator-launcher kill {job_id} # {task.name}")
183
+
184
+ print(
185
+ magenta(
186
+ "(all commands accept shortened IDs as long as there are no conflicts)"
187
+ )
188
+ )
171
189
  print(
172
- "to print all jobs: nemo-evaluator-launcher ls runs"
190
+ bold(cyan("To print all jobs: ")) + "nemo-evaluator-launcher ls runs"
173
191
  "\n (--since 1d or --since 6h for time span, see --help)"
174
192
  )
193
+
194
+ print(
195
+ green(
196
+ bold(
197
+ f"✓ Job submission successful | Invocation ID: {invocation_id}"
198
+ )
199
+ )
200
+ )
@@ -17,6 +17,7 @@ from dataclasses import dataclass
17
17
 
18
18
  from simple_parsing import field
19
19
 
20
+ import nemo_evaluator_launcher.common.printing_utils as pu
20
21
  from nemo_evaluator_launcher.executors.base import ExecutionState
21
22
 
22
23
 
@@ -143,17 +144,17 @@ class Cmd:
143
144
  """Format status with Unicode visual indicators only."""
144
145
  # Status mapping based on ExecutionState enum
145
146
  status_formats = {
146
- ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m", # Green Unicode checkmark
147
- ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m", # Red Unicode X
148
- ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m", # Yellow Unicode play button
149
- ExecutionState.PENDING.value: "\033[36m⧗ PENDING\033[0m", # Cyan Unicode hourglass (U+29D7)
150
- ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m", # Magenta Unicode X
147
+ ExecutionState.SUCCESS.value: pu.green("✓ SUCCESS"),
148
+ ExecutionState.FAILED.value: pu.red("✗ FAILED"),
149
+ ExecutionState.RUNNING.value: pu.yellow("▶ RUNNING"),
150
+ ExecutionState.PENDING.value: pu.cyan("⧗ PENDING"),
151
+ ExecutionState.KILLED.value: pu.magenta("✗ KILLED"),
151
152
  # Additional states for error handling
152
- "not_found": "\033[90m? NOT FOUND\033[0m", # Gray question mark
153
- "error": "\033[31m✗ ERROR\033[0m", # Red Unicode X
153
+ "not_found": pu.grey("? NOT FOUND"),
154
+ "error": pu.red("✗ ERROR"),
154
155
  }
155
156
 
156
- return status_formats.get(status.lower(), f"\033[90m? {status.upper()}\033[0m")
157
+ return status_formats.get(status.lower(), pu.grey(status.upper()))
157
158
 
158
159
  def _strip_ansi_codes(self, text: str) -> str:
159
160
  """Remove ANSI color codes from text for length calculation."""