nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. nemo_evaluator_launcher/api/functional.py +159 -5
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/ls_task.py +280 -0
  4. nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
  5. nemo_evaluator_launcher/cli/main.py +29 -2
  6. nemo_evaluator_launcher/cli/run.py +114 -16
  7. nemo_evaluator_launcher/cli/version.py +26 -23
  8. nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  9. nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  10. nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  11. nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  12. nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  13. nemo_evaluator_launcher/common/helpers.py +200 -51
  14. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  15. nemo_evaluator_launcher/common/mapping.py +341 -155
  16. nemo_evaluator_launcher/common/printing_utils.py +25 -12
  17. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  18. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  19. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  20. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  21. nemo_evaluator_launcher/executors/base.py +31 -1
  22. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  23. nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
  24. nemo_evaluator_launcher/executors/local/executor.py +383 -24
  25. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  26. nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
  27. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  28. nemo_evaluator_launcher/exporters/utils.py +32 -46
  29. nemo_evaluator_launcher/package_info.py +1 -1
  30. nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  31. nemo_evaluator_launcher/resources/mapping.toml +64 -315
  32. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
  33. nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
  34. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
  35. nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
  36. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
  37. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
  38. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,8 @@ import shlex
26
26
  import shutil
27
27
  import subprocess
28
28
  import time
29
- from typing import List, Optional
29
+ import warnings
30
+ from typing import Iterator, List, Optional, Tuple, Union
30
31
 
31
32
  import jinja2
32
33
  import yaml
@@ -39,15 +40,19 @@ from nemo_evaluator_launcher.common.execdb import (
39
40
  generate_job_id,
40
41
  )
41
42
  from nemo_evaluator_launcher.common.helpers import (
43
+ get_api_key_name,
44
+ get_endpoint_url,
42
45
  get_eval_factory_command,
43
46
  get_eval_factory_dataset_size_from_run_config,
47
+ get_health_url,
44
48
  get_timestamp_string,
45
49
  )
50
+ from nemo_evaluator_launcher.common.logging_utils import logger
46
51
  from nemo_evaluator_launcher.common.mapping import (
47
- get_task_from_mapping,
52
+ get_task_definition_for_job,
48
53
  load_tasks_mapping,
49
54
  )
50
- from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey
55
+ from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey, red
51
56
  from nemo_evaluator_launcher.executors.base import (
52
57
  BaseExecutor,
53
58
  ExecutionState,
@@ -70,14 +75,8 @@ class LocalExecutor(BaseExecutor):
70
75
  str: The invocation ID for the evaluation run.
71
76
 
72
77
  Raises:
73
- NotImplementedError: If deployment is not 'none'.
74
78
  RuntimeError: If the run script fails.
75
79
  """
76
- if cfg.deployment.type != "none":
77
- raise NotImplementedError(
78
- f"type {cfg.deployment.type} is not implemented -- add deployment support"
79
- )
80
-
81
80
  # Check if docker is available (skip in dry_run mode)
82
81
  if not dry_run and shutil.which("docker") is None:
83
82
  raise RuntimeError(
@@ -97,12 +96,16 @@ class LocalExecutor(BaseExecutor):
97
96
  evaluation_tasks = []
98
97
  job_ids = []
99
98
 
100
- eval_template = jinja2.Template(
99
+ run_template = jinja2.Template(
101
100
  open(pathlib.Path(__file__).parent / "run.template.sh", "r").read()
102
101
  )
103
102
 
104
103
  execution_mode = cfg.execution.get("mode", "parallel")
105
104
  if execution_mode == "parallel":
105
+ if cfg.deployment.type != "none":
106
+ raise ValueError(
107
+ f"Execution mode 'parallel' is not supported with deployment type: {cfg.deployment.type}. Use 'sequential' instead."
108
+ )
106
109
  is_execution_mode_sequential = False
107
110
  elif execution_mode == "sequential":
108
111
  is_execution_mode_sequential = True
@@ -113,20 +116,80 @@ class LocalExecutor(BaseExecutor):
113
116
  )
114
117
  )
115
118
 
119
+ # Will accumulate if any task contains unsafe commands.
120
+ is_potentially_unsafe = False
121
+
122
+ deployment = None
123
+
116
124
  for idx, task in enumerate(cfg.evaluation.tasks):
117
- task_definition = get_task_from_mapping(task.name, tasks_mapping)
125
+ timestamp = get_timestamp_string()
126
+ task_definition = get_task_definition_for_job(
127
+ task_query=task.name,
128
+ base_mapping=tasks_mapping,
129
+ container=task.get("container"),
130
+ )
131
+
132
+ if cfg.deployment.type != "none":
133
+ # container name
134
+ server_container_name = f"server-{task.name}-{timestamp}"
135
+
136
+ # health_url
137
+ health_url = get_health_url(
138
+ cfg, get_endpoint_url(cfg, task, task_definition["endpoint_type"])
139
+ )
140
+
141
+ # mounts
142
+ deployment_mounts_list = []
143
+ if checkpoint_path := cfg.deployment.get("checkpoint_path"):
144
+ deployment_mounts_list.append(f"{checkpoint_path}:/checkpoint:ro")
145
+ if cache_path := cfg.deployment.get("cache_path"):
146
+ deployment_mounts_list.append(f"{cache_path}:/cache")
147
+ for source_mnt, target_mnt in (
148
+ cfg.execution.get("mounts", {}).get("deployment", {}).items()
149
+ ):
150
+ deployment_mounts_list.append(f"{source_mnt}:{target_mnt}")
151
+
152
+ # env vars
153
+ deployment_env_vars = cfg.execution.get("env_vars", {}).get(
154
+ "deployment", {}
155
+ )
156
+
157
+ if cfg.deployment.get("env_vars"):
158
+ warnings.warn(
159
+ "cfg.deployment.env_vars will be deprecated in future versions. "
160
+ "Use cfg.execution.env_vars.deployment instead.",
161
+ category=DeprecationWarning,
162
+ stacklevel=2,
163
+ )
164
+ deployment_env_vars.update(cfg.deployment["env_vars"])
165
+
166
+ command = cfg.deployment.command
167
+ deployment_extra_docker_args = cfg.execution.get(
168
+ "extra_docker_args", ""
169
+ )
170
+
171
+ deployment = {
172
+ "container_name": server_container_name,
173
+ "image": cfg.deployment.image,
174
+ "command": command,
175
+ "mounts": deployment_mounts_list,
176
+ "env_vars": [f"{k}={v}" for k, v in deployment_env_vars.items()],
177
+ "health_url": health_url,
178
+ "port": cfg.deployment.port,
179
+ "extra_docker_args": deployment_extra_docker_args,
180
+ }
118
181
 
119
182
  # Create job ID as <invocation_id>.<n>
120
183
  job_id = generate_job_id(invocation_id, idx)
121
184
  job_ids.append(job_id)
122
- container_name = f"{task.name}-{get_timestamp_string()}"
185
+ client_container_name = f"client-{task.name}-{timestamp}"
123
186
 
124
187
  # collect all env vars
125
188
  env_vars = copy.deepcopy(dict(cfg.evaluation.get("env_vars", {})))
126
189
  env_vars.update(task.get("env_vars", {}))
127
- if cfg.target.api_endpoint.api_key_name:
190
+ if api_key_name := get_api_key_name(cfg):
128
191
  assert "API_KEY" not in env_vars
129
- env_vars["API_KEY"] = cfg.target.api_endpoint.api_key_name
192
+ env_vars["API_KEY"] = api_key_name
130
193
 
131
194
  # check if the environment variables are set
132
195
  for env_var in env_vars.values():
@@ -135,8 +198,11 @@ class LocalExecutor(BaseExecutor):
135
198
  f"Trying to pass an unset environment variable {env_var}."
136
199
  )
137
200
 
138
- # check if required env vars are defined:
201
+ # check if required env vars are defined (excluding NEMO_EVALUATOR_DATASET_DIR which is handled separately):
139
202
  for required_env_var in task_definition.get("required_env_vars", []):
203
+ # Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic below
204
+ if required_env_var == "NEMO_EVALUATOR_DATASET_DIR":
205
+ continue
140
206
  if required_env_var not in env_vars.keys():
141
207
  raise ValueError(
142
208
  f"{task.name} task requires environment variable {required_env_var}."
@@ -144,12 +210,38 @@ class LocalExecutor(BaseExecutor):
144
210
  f" pair {required_env_var}: YOUR_ENV_VAR_NAME"
145
211
  )
146
212
 
213
+ # Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
214
+ dataset_mount_host = None
215
+ dataset_mount_container = None
216
+ dataset_env_var_value = None
217
+ if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
218
+ "required_env_vars", []
219
+ ):
220
+ # Get dataset directory from task config
221
+ if "dataset_dir" in task:
222
+ dataset_mount_host = task["dataset_dir"]
223
+ else:
224
+ raise ValueError(
225
+ f"{task.name} task requires a dataset_dir to be specified. "
226
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
227
+ )
228
+ # Get container mount path (default to /datasets if not specified)
229
+ dataset_mount_container = task.get("dataset_mount_path", "/datasets")
230
+ # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
231
+ dataset_env_var_value = dataset_mount_container
232
+
147
233
  # format env_vars for a template
148
- env_vars = [
234
+ env_vars_list = [
149
235
  f"{env_var_dst}=${env_var_src}"
150
236
  for env_var_dst, env_var_src in env_vars.items()
151
237
  ]
152
238
 
239
+ # Add dataset env var if needed (directly with value, not from host env)
240
+ if dataset_env_var_value:
241
+ env_vars_list.append(
242
+ f"NEMO_EVALUATOR_DATASET_DIR={dataset_env_var_value}"
243
+ )
244
+
153
245
  eval_image = task_definition["container"]
154
246
  if "container" in task:
155
247
  eval_image = task["container"]
@@ -166,15 +258,22 @@ class LocalExecutor(BaseExecutor):
166
258
  # TODO(agronskiy): cleaner way is to encode everything with base64, not
167
259
  # some parts (like ef_config.yaml) and just output as logs somewhere.
168
260
  eval_factory_command_debug_comment = eval_factory_command_struct.debug
261
+ is_potentially_unsafe = (
262
+ is_potentially_unsafe
263
+ or eval_factory_command_struct.is_potentially_unsafe
264
+ )
169
265
  evaluation_task = {
266
+ "deployment": deployment,
170
267
  "name": task.name,
171
268
  "job_id": job_id,
172
269
  "eval_image": eval_image,
173
- "container_name": container_name,
174
- "env_vars": env_vars,
270
+ "client_container_name": client_container_name,
271
+ "env_vars": env_vars_list,
175
272
  "output_dir": task_output_dir,
176
273
  "eval_factory_command": eval_factory_command,
177
274
  "eval_factory_command_debug_comment": eval_factory_command_debug_comment,
275
+ "dataset_mount_host": dataset_mount_host,
276
+ "dataset_mount_container": dataset_mount_container,
178
277
  }
179
278
  evaluation_tasks.append(evaluation_task)
180
279
 
@@ -185,7 +284,7 @@ class LocalExecutor(BaseExecutor):
185
284
  extra_docker_args = cfg.execution.get("extra_docker_args", "")
186
285
 
187
286
  run_sh_content = (
188
- eval_template.render(
287
+ run_template.render(
189
288
  evaluation_tasks=[evaluation_task],
190
289
  auto_export_destinations=auto_export_destinations,
191
290
  extra_docker_args=extra_docker_args,
@@ -196,7 +295,7 @@ class LocalExecutor(BaseExecutor):
196
295
  (task_output_dir / "run.sh").write_text(run_sh_content)
197
296
 
198
297
  run_all_sequentially_sh_content = (
199
- eval_template.render(
298
+ run_template.render(
200
299
  evaluation_tasks=evaluation_tasks,
201
300
  auto_export_destinations=auto_export_destinations,
202
301
  extra_docker_args=extra_docker_args,
@@ -230,8 +329,34 @@ class LocalExecutor(BaseExecutor):
230
329
  with open(task_output_dir / "run.sh", "r") as f:
231
330
  print(grey(f.read()))
232
331
  print(bold("\nTo execute, run without --dry-run"))
332
+
333
+ if is_potentially_unsafe:
334
+ print(
335
+ red(
336
+ "\nFound `pre_cmd` which carries security risk. When running without --dry-run "
337
+ "make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
338
+ )
339
+ )
233
340
  return invocation_id
234
341
 
342
+ if is_potentially_unsafe:
343
+ if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
344
+ logger.warning(
345
+ "Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
346
+ "is set, proceeding with caution."
347
+ )
348
+
349
+ else:
350
+ logger.error(
351
+ "Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
352
+ "is not set. This might carry security risk and unstable environments. "
353
+ "To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
354
+ )
355
+ raise AttributeError(
356
+ "Untrusted command found in config, make sure you trust and "
357
+ "set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
358
+ )
359
+
235
360
  # Save launched jobs metadata
236
361
  db = ExecutionDB()
237
362
  for job_id, task, evaluation_task in zip(
@@ -245,7 +370,7 @@ class LocalExecutor(BaseExecutor):
245
370
  executor="local",
246
371
  data={
247
372
  "output_dir": str(evaluation_task["output_dir"]),
248
- "container": evaluation_task["container_name"],
373
+ "container": evaluation_task["client_container_name"],
249
374
  "eval_image": evaluation_task["eval_image"],
250
375
  },
251
376
  config=OmegaConf.to_object(cfg),
@@ -301,11 +426,11 @@ class LocalExecutor(BaseExecutor):
301
426
 
302
427
  print(bold(cyan("\nCommands for real-time monitoring:")))
303
428
  for job_id, evaluation_task in zip(job_ids, evaluation_tasks):
304
- log_file = evaluation_task["output_dir"] / "logs" / "stdout.log"
305
- print(f" tail -f {log_file}")
429
+ print(f"\n Job {job_id} ({evaluation_task['name']}):")
430
+ print(f" nemo-evaluator-launcher logs {job_id}")
306
431
 
307
432
  print(bold(cyan("\nFollow all logs for this invocation:")))
308
- print(f" tail -f {output_dir}/*/logs/stdout.log\n")
433
+ print(f" nemo-evaluator-launcher logs {invocation_id}")
309
434
 
310
435
  return invocation_id
311
436
 
@@ -501,6 +626,240 @@ class LocalExecutor(BaseExecutor):
501
626
  )
502
627
  raise RuntimeError(error_msg)
503
628
 
629
+ @staticmethod
630
+ def stream_logs(
631
+ id: Union[str, List[str]], executor_name: Optional[str] = None
632
+ ) -> Iterator[Tuple[str, str, str]]:
633
+ """Stream logs from a job or invocation group.
634
+
635
+ Args:
636
+ id: Unique job identifier, invocation identifier, or list of job IDs to stream simultaneously.
637
+
638
+ Yields:
639
+ Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
640
+ Empty lines are yielded as empty strings.
641
+ """
642
+ db = ExecutionDB()
643
+
644
+ # Handle list of job IDs for simultaneous streaming
645
+ if isinstance(id, list):
646
+ # Collect all jobs from the list of job IDs
647
+ jobs = {}
648
+ for job_id in id:
649
+ job_data = db.get_job(job_id)
650
+ if job_data is None or job_data.executor != "local":
651
+ continue
652
+ jobs[job_id] = job_data
653
+ if not jobs:
654
+ return
655
+ # If id looks like an invocation_id (no dot), get all jobs for it
656
+ elif "." not in id:
657
+ jobs = db.get_jobs(id)
658
+ if not jobs:
659
+ return
660
+ else:
661
+ # Otherwise, treat as job_id
662
+ job_data = db.get_job(id)
663
+ if job_data is None or job_data.executor != "local":
664
+ return
665
+ jobs = {id: job_data}
666
+
667
+ # Collect log file paths and metadata
668
+ log_files = []
669
+
670
+ for job_id, job_data in jobs.items():
671
+ output_dir = pathlib.Path(job_data.data.get("output_dir", ""))
672
+ if not output_dir:
673
+ continue
674
+
675
+ # Get task name from config
676
+ task_name = LocalExecutor._extract_task_name(job_data, job_id)
677
+
678
+ log_file_path = output_dir / "logs" / "client_stdout.log"
679
+
680
+ log_files.append(
681
+ {
682
+ "job_id": job_id,
683
+ "task_name": task_name,
684
+ "path": log_file_path,
685
+ "file_handle": None,
686
+ "position": 0,
687
+ }
688
+ )
689
+
690
+ if not log_files:
691
+ return
692
+
693
+ # Track which files we've seen before (for tail behavior)
694
+ file_seen_before = {}
695
+
696
+ # Open files that exist, keep track of which ones we're waiting for
697
+ # First, yield the last 15 lines from existing files
698
+ for log_info in log_files:
699
+ if log_info["path"].exists():
700
+ file_seen_before[log_info["path"]] = True
701
+ # Read and yield last 15 lines
702
+ last_lines = LocalExecutor._read_last_n_lines(log_info["path"], 15)
703
+ for line in last_lines:
704
+ yield (
705
+ log_info["job_id"],
706
+ log_info["task_name"],
707
+ line,
708
+ )
709
+ try:
710
+ log_info["file_handle"] = open(
711
+ log_info["path"], "r", encoding="utf-8", errors="replace"
712
+ )
713
+ # Seek to end if file already exists (tail behavior)
714
+ log_info["file_handle"].seek(0, 2)
715
+ log_info["position"] = log_info["file_handle"].tell()
716
+ except Exception as e:
717
+ logger.error(f"Could not open {log_info['path']}: {e}")
718
+ else:
719
+ file_seen_before[log_info["path"]] = False
720
+
721
+ try:
722
+ while True:
723
+ any_activity = False
724
+
725
+ for log_info in log_files:
726
+ # Try to open file if it doesn't exist yet
727
+ if log_info["file_handle"] is None:
728
+ if log_info["path"].exists():
729
+ try:
730
+ # If file was just created, read last 15 lines first
731
+ if not file_seen_before.get(log_info["path"], False):
732
+ last_lines = LocalExecutor._read_last_n_lines(
733
+ log_info["path"], 15
734
+ )
735
+ for line in last_lines:
736
+ yield (
737
+ log_info["job_id"],
738
+ log_info["task_name"],
739
+ line,
740
+ )
741
+ file_seen_before[log_info["path"]] = True
742
+
743
+ log_info["file_handle"] = open(
744
+ log_info["path"],
745
+ "r",
746
+ encoding="utf-8",
747
+ errors="replace",
748
+ )
749
+ # Seek to end for tail behavior
750
+ log_info["file_handle"].seek(0, 2)
751
+ log_info["position"] = log_info["file_handle"].tell()
752
+ except Exception as e:
753
+ logger.error(f"Could not open {log_info['path']}: {e}")
754
+ continue
755
+
756
+ # Read new lines from file
757
+ if log_info["file_handle"] is not None:
758
+ try:
759
+ # Check if file has grown
760
+ current_size = log_info["path"].stat().st_size
761
+ if current_size > log_info["position"]:
762
+ log_info["file_handle"].seek(log_info["position"])
763
+ new_lines = log_info["file_handle"].readlines()
764
+ log_info["position"] = log_info["file_handle"].tell()
765
+
766
+ # Yield new lines
767
+ for line in new_lines:
768
+ line_stripped = line.rstrip("\n\r")
769
+ yield (
770
+ log_info["job_id"],
771
+ log_info["task_name"],
772
+ line_stripped,
773
+ )
774
+ any_activity = True
775
+ except (OSError, IOError) as e:
776
+ # File might have been deleted or moved
777
+ # Don't log error for every check, only on first error
778
+ if log_info.get("error_printed", False) is False:
779
+ logger.error(f"Error reading {log_info['path']}: {e}")
780
+ log_info["error_printed"] = True
781
+ log_info["file_handle"] = None
782
+ except Exception:
783
+ # Reset error flag if we successfully read again
784
+ log_info["error_printed"] = False
785
+
786
+ # If no activity, sleep briefly to avoid busy waiting
787
+ if not any_activity:
788
+ time.sleep(0.1)
789
+
790
+ except KeyboardInterrupt:
791
+ # Clean exit on Ctrl+C
792
+ pass
793
+ finally:
794
+ # Close all file handles
795
+ for log_info in log_files:
796
+ if log_info["file_handle"] is not None:
797
+ try:
798
+ log_info["file_handle"].close()
799
+ except Exception:
800
+ pass
801
+
802
+ @staticmethod
803
+ def _read_last_n_lines(file_path: pathlib.Path, n: int) -> List[str]:
804
+ """Read the last N lines from a file efficiently.
805
+
806
+ Args:
807
+ file_path: Path to the file to read from.
808
+ n: Number of lines to read from the end.
809
+
810
+ Returns:
811
+ List of the last N lines (or fewer if file has fewer lines).
812
+ """
813
+ try:
814
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
815
+ # Read all lines
816
+ all_lines = f.readlines()
817
+ # Return last n lines, stripping newlines
818
+ return [line.rstrip("\n\r") for line in all_lines[-n:]]
819
+ except Exception as e:
820
+ logger.warning(f"Could not read last {n} lines from {file_path}: {e}")
821
+ return []
822
+
823
+ @staticmethod
824
+ def _extract_task_name(job_data: JobData, job_id: str) -> str:
825
+ """Extract task name from job data config.
826
+
827
+ Args:
828
+ job_data: JobData object containing config.
829
+ job_id: Job ID for error reporting.
830
+
831
+ Returns:
832
+ Task name string.
833
+ """
834
+ config = job_data.config or {}
835
+ evaluation = config.get("evaluation", {})
836
+ tasks = evaluation.get("tasks", [])
837
+
838
+ # Find the task that matches this job
839
+ # For job_id like "15b9f667.0", index is 0
840
+ try:
841
+ if "." in job_id:
842
+ index = int(job_id.split(".")[1])
843
+ if len(tasks) > 0 and index >= len(tasks):
844
+ raise AttributeError(
845
+ f"Job task index {job_id} is larger than number of tasks {len(tasks)} in invocation"
846
+ )
847
+ # If index is valid and tasks exist, return the task name
848
+ if len(tasks) > 0 and index < len(tasks):
849
+ return tasks[index].get("name", "unknown")
850
+ except (ValueError, IndexError):
851
+ pass
852
+
853
+ # Fallback: try to get task name from output_dir
854
+ # output_dir typically ends with task name
855
+ output_dir = job_data.data.get("output_dir", "")
856
+ if output_dir:
857
+ parts = pathlib.Path(output_dir).parts
858
+ if parts:
859
+ return parts[-1]
860
+
861
+ return "unknown"
862
+
504
863
  @staticmethod
505
864
  def _add_to_killed_jobs(invocation_id: str, job_id: str) -> None:
506
865
  """Add a job ID to the killed jobs file for this invocation.
@@ -22,6 +22,20 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
22
22
  killed_jobs_file="$script_dir/killed_jobs.txt"
23
23
  rm -f "$killed_jobs_file"
24
24
 
25
+ # Create all directories and stdout.log files upfront before any container starts
26
+ {% for task in evaluation_tasks %}
27
+ task_dir="{{ task.output_dir }}"
28
+ artifacts_dir="$task_dir/artifacts"
29
+ logs_dir="$task_dir/logs"
30
+
31
+ mkdir -m 777 -p "$task_dir"
32
+ mkdir -m 777 -p "$artifacts_dir"
33
+ mkdir -m 777 -p "$logs_dir"
34
+ # Create stdout.log file upfront
35
+ touch "$logs_dir/client_stdout.log"
36
+ chmod 666 "$logs_dir/client_stdout.log"
37
+ {% endfor %}
38
+
25
39
  {% for task in evaluation_tasks %}
26
40
  # {{ task.job_id }} {{ task.name }}
27
41
 
@@ -46,9 +60,41 @@ else
46
60
  # Docker run with eval factory command
47
61
  (
48
62
  echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running"
63
+ {% if task.deployment %}
64
+ docker run --rm --shm-size=100g --gpus all {{ task.deployment.extra_docker_args }} \
65
+ --name {{ task.deployment.container_name }} --entrypoint '' \
66
+ -p {{ task.deployment.port }}:{{ task.deployment.port }} \
67
+ {% for env_var in task.deployment.env_vars -%}
68
+ -e {{ env_var }} \
69
+ {% endfor -%}
70
+ {% for mount in task.deployment.mounts -%}
71
+ -v {{ mount }} \
72
+ {% endfor -%}
73
+ {{ task.deployment.image }} \
74
+ {{ task.deployment.command }} > "$logs_dir/server_stdout.log" 2>&1 &
75
+
76
+ SERVER_PID=$!
77
+ SERVER_CONTAINER_NAME="{{ task.deployment.container_name }}"
78
+
79
+ date
80
+ # wait for the server to initialize
81
+ TIMEOUT=600
82
+ ELAPSED=0
83
+ while [[ "$(curl -s -o /dev/null -w "%{http_code}" {{ task.deployment.health_url }})" != "200" ]]; do
84
+ kill -0 $SERVER_PID 2>/dev/null || { echo "Server process $SERVER_PID died"; echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) 1" > "$logs_dir/stage.exit"; exit 1; }
85
+ [ $ELAPSED -ge $TIMEOUT ] && { echo "Health check timeout after ${TIMEOUT}s"; echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) 1" > "$logs_dir/stage.exit"; exit 1; }
86
+ sleep 5
87
+ ELAPSED=$((ELAPSED + 5))
88
+ done
89
+ date
90
+
91
+ {% endif %}
49
92
  docker run --rm --shm-size=100g {{ extra_docker_args }} \
50
- --name {{ task.container_name }} \
93
+ {% if task.deployment %}--network container:$SERVER_CONTAINER_NAME \{% endif %}--name {{ task.client_container_name }} \
51
94
  --volume "$artifacts_dir":/results \
95
+ {% if task.dataset_mount_host and task.dataset_mount_container -%}
96
+ --volume "{{ task.dataset_mount_host }}:{{ task.dataset_mount_container }}" \
97
+ {% endif -%}
52
98
  {% for env_var in task.env_vars -%}
53
99
  -e {{ env_var }} \
54
100
  {% endfor -%}
@@ -63,8 +109,14 @@ else
63
109
  fi;
64
110
  echo "Container completed successfully" >&2;
65
111
  exit 0;
66
- ' > "$logs_dir/stdout.log" 2>&1
112
+ ' > "$logs_dir/client_stdout.log" 2>&1
67
113
  exit_code=$?
114
+
115
+ {% if task.deployment %}
116
+ # Stop the server
117
+ docker stop $SERVER_CONTAINER_NAME 2>/dev/null || true
118
+ {% endif %}
119
+
68
120
  echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) $exit_code" > "$logs_dir/stage.exit"
69
121
  ) >> "$logs_dir/stdout.log" 2>&1
70
122