nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. nemo_evaluator_launcher/api/functional.py +105 -1
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/main.py +12 -0
  4. nemo_evaluator_launcher/cli/run.py +73 -15
  5. nemo_evaluator_launcher/cli/version.py +26 -23
  6. nemo_evaluator_launcher/common/helpers.py +176 -43
  7. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  8. nemo_evaluator_launcher/common/printing_utils.py +7 -0
  9. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  10. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  11. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  12. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  13. nemo_evaluator_launcher/executors/base.py +31 -1
  14. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  15. nemo_evaluator_launcher/executors/lepton/executor.py +81 -1
  16. nemo_evaluator_launcher/executors/local/executor.py +377 -22
  17. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  18. nemo_evaluator_launcher/executors/slurm/executor.py +422 -59
  19. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  20. nemo_evaluator_launcher/exporters/utils.py +32 -46
  21. nemo_evaluator_launcher/package_info.py +1 -1
  22. nemo_evaluator_launcher/resources/mapping.toml +56 -15
  23. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/METADATA +3 -3
  24. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/RECORD +28 -26
  25. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
  26. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
  27. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
  28. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@
18
18
  Handles deployment and evaluation using Lepton endpoints with NIM containers.
19
19
  """
20
20
 
21
+ import os
21
22
  import time
22
23
  from pathlib import Path
23
24
  from typing import List
@@ -36,6 +37,7 @@ from nemo_evaluator_launcher.common.mapping import (
36
37
  get_task_from_mapping,
37
38
  load_tasks_mapping,
38
39
  )
40
+ from nemo_evaluator_launcher.common.printing_utils import red
39
41
  from nemo_evaluator_launcher.executors.base import (
40
42
  BaseExecutor,
41
43
  ExecutionState,
@@ -88,6 +90,23 @@ class LeptonExecutor(BaseExecutor):
88
90
  # Generate invocation ID
89
91
  invocation_id = generate_invocation_id()
90
92
 
93
+ # TODO(agronskiy): the structure of this executor differs from others,
94
+ # so the best place to check for unsafe commands yelids a bit of duplication.
95
+ # We can't use the get_eval_factory_command here because the port is not yet
96
+ # populated.
97
+ # Refactor the whole thing.
98
+ is_potentially_unsafe = False
99
+ for idx, task in enumerate(cfg.evaluation.tasks):
100
+ pre_cmd: str = task.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
101
+ if pre_cmd:
102
+ is_potentially_unsafe = True
103
+ break
104
+
105
+ # Check for deployment pre_cmd
106
+ deployment_pre_cmd: str = cfg.deployment.get("pre_cmd") or ""
107
+ if deployment_pre_cmd:
108
+ is_potentially_unsafe = True
109
+
91
110
  # DRY-RUN mode
92
111
  if dry_run:
93
112
  output_dir = Path(cfg.execution.output_dir).absolute() / invocation_id
@@ -102,8 +121,34 @@ class LeptonExecutor(BaseExecutor):
102
121
  else:
103
122
  print(f"with endpoint type '{cfg.deployment.type}'")
104
123
 
124
+ if is_potentially_unsafe:
125
+ print(
126
+ red(
127
+ "\nFound `pre_cmd` (evaluation or deployment) which carries security risk. When running without --dry-run "
128
+ "make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
129
+ )
130
+ )
131
+
105
132
  return invocation_id
106
133
 
134
+ if is_potentially_unsafe:
135
+ if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
136
+ logger.warning(
137
+ "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
138
+ "is set, proceeding with caution."
139
+ )
140
+
141
+ else:
142
+ logger.error(
143
+ "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
144
+ "is not set. This might carry security risk and unstable environments. "
145
+ "To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
146
+ )
147
+ raise AttributeError(
148
+ "Untrusted command found in config, make sure you trust and "
149
+ "set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
150
+ )
151
+
107
152
  # For deployment: none, we use the existing endpoint for all tasks
108
153
  if cfg.deployment.type == "none":
109
154
  print("📌 Using existing endpoint (deployment: none)")
@@ -491,6 +536,33 @@ class LeptonExecutor(BaseExecutor):
491
536
 
492
537
  job_mounts.append(mount_dict)
493
538
 
539
+ # Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
540
+ if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
541
+ "required_env_vars", []
542
+ ):
543
+ # Get dataset directory from task config
544
+ if "dataset_dir" in task:
545
+ dataset_mount_host = task["dataset_dir"]
546
+ else:
547
+ raise ValueError(
548
+ f"{task.name} task requires a dataset_dir to be specified. "
549
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
550
+ )
551
+ # Get container mount path (default to /datasets if not specified)
552
+ dataset_mount_container = task.get(
553
+ "dataset_mount_path", "/datasets"
554
+ )
555
+ # Add dataset mount to job mounts
556
+ # Lepton mount format: {"path": "/path/in/container", "mount_from": {"path": "/host/path"}}
557
+ job_mounts.append(
558
+ {
559
+ "path": dataset_mount_container,
560
+ "mount_from": {"path": dataset_mount_host},
561
+ }
562
+ )
563
+ # Add NEMO_EVALUATOR_DATASET_DIR environment variable
564
+ job_env_vars["NEMO_EVALUATOR_DATASET_DIR"] = dataset_mount_container
565
+
494
566
  print(
495
567
  f" - Storage: {len(job_mounts)} mount(s) with evaluation ID isolation"
496
568
  )
@@ -610,7 +682,7 @@ class LeptonExecutor(BaseExecutor):
610
682
  job_state = lepton_status.get("state", "Unknown")
611
683
 
612
684
  # Map Lepton job states to our execution states
613
- if job_state == "Succeeded":
685
+ if job_state in ["Succeeded", "Completed"]:
614
686
  state = ExecutionState.SUCCESS
615
687
  elif job_state in ["Running", "Pending", "Starting"]:
616
688
  state = ExecutionState.RUNNING
@@ -859,6 +931,14 @@ def _dry_run_lepton(
859
931
  td = get_task_from_mapping(task.name, tasks_mapping)
860
932
  required = td.get("required_env_vars", []) or []
861
933
  for var in required:
934
+ # Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic
935
+ if var == "NEMO_EVALUATOR_DATASET_DIR":
936
+ if "dataset_dir" not in task:
937
+ raise ValueError(
938
+ f"Task '{task.name}' requires dataset_dir to be specified. "
939
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
940
+ )
941
+ continue
862
942
  if var == "API_KEY":
863
943
  if not (("API_KEY" in lepton_env_vars) or bool(api_key_name)):
864
944
  raise ValueError(
@@ -26,7 +26,8 @@ import shlex
26
26
  import shutil
27
27
  import subprocess
28
28
  import time
29
- from typing import List, Optional
29
+ import warnings
30
+ from typing import Iterator, List, Optional, Tuple, Union
30
31
 
31
32
  import jinja2
32
33
  import yaml
@@ -39,15 +40,19 @@ from nemo_evaluator_launcher.common.execdb import (
39
40
  generate_job_id,
40
41
  )
41
42
  from nemo_evaluator_launcher.common.helpers import (
43
+ get_api_key_name,
44
+ get_endpoint_url,
42
45
  get_eval_factory_command,
43
46
  get_eval_factory_dataset_size_from_run_config,
47
+ get_health_url,
44
48
  get_timestamp_string,
45
49
  )
50
+ from nemo_evaluator_launcher.common.logging_utils import logger
46
51
  from nemo_evaluator_launcher.common.mapping import (
47
52
  get_task_from_mapping,
48
53
  load_tasks_mapping,
49
54
  )
50
- from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey
55
+ from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey, red
51
56
  from nemo_evaluator_launcher.executors.base import (
52
57
  BaseExecutor,
53
58
  ExecutionState,
@@ -70,14 +75,8 @@ class LocalExecutor(BaseExecutor):
70
75
  str: The invocation ID for the evaluation run.
71
76
 
72
77
  Raises:
73
- NotImplementedError: If deployment is not 'none'.
74
78
  RuntimeError: If the run script fails.
75
79
  """
76
- if cfg.deployment.type != "none":
77
- raise NotImplementedError(
78
- f"type {cfg.deployment.type} is not implemented -- add deployment support"
79
- )
80
-
81
80
  # Check if docker is available (skip in dry_run mode)
82
81
  if not dry_run and shutil.which("docker") is None:
83
82
  raise RuntimeError(
@@ -97,12 +96,16 @@ class LocalExecutor(BaseExecutor):
97
96
  evaluation_tasks = []
98
97
  job_ids = []
99
98
 
100
- eval_template = jinja2.Template(
99
+ run_template = jinja2.Template(
101
100
  open(pathlib.Path(__file__).parent / "run.template.sh", "r").read()
102
101
  )
103
102
 
104
103
  execution_mode = cfg.execution.get("mode", "parallel")
105
104
  if execution_mode == "parallel":
105
+ if cfg.deployment.type != "none":
106
+ raise ValueError(
107
+ f"Execution mode 'parallel' is not supported with deployment type: {cfg.deployment.type}. Use 'sequential' instead."
108
+ )
106
109
  is_execution_mode_sequential = False
107
110
  elif execution_mode == "sequential":
108
111
  is_execution_mode_sequential = True
@@ -113,20 +116,76 @@ class LocalExecutor(BaseExecutor):
113
116
  )
114
117
  )
115
118
 
119
+ # Will accumulate if any task contains unsafe commands.
120
+ is_potentially_unsafe = False
121
+
122
+ deployment = None
123
+
116
124
  for idx, task in enumerate(cfg.evaluation.tasks):
125
+ timestamp = get_timestamp_string()
117
126
  task_definition = get_task_from_mapping(task.name, tasks_mapping)
118
127
 
128
+ if cfg.deployment.type != "none":
129
+ # container name
130
+ server_container_name = f"server-{task.name}-{timestamp}"
131
+
132
+ # health_url
133
+ health_url = get_health_url(
134
+ cfg, get_endpoint_url(cfg, task, task_definition["endpoint_type"])
135
+ )
136
+
137
+ # mounts
138
+ deployment_mounts_list = []
139
+ if checkpoint_path := cfg.deployment.get("checkpoint_path"):
140
+ deployment_mounts_list.append(f"{checkpoint_path}:/checkpoint:ro")
141
+ if cache_path := cfg.deployment.get("cache_path"):
142
+ deployment_mounts_list.append(f"{cache_path}:/cache")
143
+ for source_mnt, target_mnt in (
144
+ cfg.execution.get("mounts", {}).get("deployment", {}).items()
145
+ ):
146
+ deployment_mounts_list.append(f"{source_mnt}:{target_mnt}")
147
+
148
+ # env vars
149
+ deployment_env_vars = cfg.execution.get("env_vars", {}).get(
150
+ "deployment", {}
151
+ )
152
+
153
+ if cfg.deployment.get("env_vars"):
154
+ warnings.warn(
155
+ "cfg.deployment.env_vars will be deprecated in future versions. "
156
+ "Use cfg.execution.env_vars.deployment instead.",
157
+ category=DeprecationWarning,
158
+ stacklevel=2,
159
+ )
160
+ deployment_env_vars.update(cfg.deployment["env_vars"])
161
+
162
+ command = cfg.deployment.command
163
+ deployment_extra_docker_args = cfg.execution.get(
164
+ "extra_docker_args", ""
165
+ )
166
+
167
+ deployment = {
168
+ "container_name": server_container_name,
169
+ "image": cfg.deployment.image,
170
+ "command": command,
171
+ "mounts": deployment_mounts_list,
172
+ "env_vars": [f"{k}={v}" for k, v in deployment_env_vars.items()],
173
+ "health_url": health_url,
174
+ "port": cfg.deployment.port,
175
+ "extra_docker_args": deployment_extra_docker_args,
176
+ }
177
+
119
178
  # Create job ID as <invocation_id>.<n>
120
179
  job_id = generate_job_id(invocation_id, idx)
121
180
  job_ids.append(job_id)
122
- container_name = f"{task.name}-{get_timestamp_string()}"
181
+ client_container_name = f"client-{task.name}-{timestamp}"
123
182
 
124
183
  # collect all env vars
125
184
  env_vars = copy.deepcopy(dict(cfg.evaluation.get("env_vars", {})))
126
185
  env_vars.update(task.get("env_vars", {}))
127
- if cfg.target.api_endpoint.api_key_name:
186
+ if api_key_name := get_api_key_name(cfg):
128
187
  assert "API_KEY" not in env_vars
129
- env_vars["API_KEY"] = cfg.target.api_endpoint.api_key_name
188
+ env_vars["API_KEY"] = api_key_name
130
189
 
131
190
  # check if the environment variables are set
132
191
  for env_var in env_vars.values():
@@ -135,8 +194,11 @@ class LocalExecutor(BaseExecutor):
135
194
  f"Trying to pass an unset environment variable {env_var}."
136
195
  )
137
196
 
138
- # check if required env vars are defined:
197
+ # check if required env vars are defined (excluding NEMO_EVALUATOR_DATASET_DIR which is handled separately):
139
198
  for required_env_var in task_definition.get("required_env_vars", []):
199
+ # Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic below
200
+ if required_env_var == "NEMO_EVALUATOR_DATASET_DIR":
201
+ continue
140
202
  if required_env_var not in env_vars.keys():
141
203
  raise ValueError(
142
204
  f"{task.name} task requires environment variable {required_env_var}."
@@ -144,12 +206,38 @@ class LocalExecutor(BaseExecutor):
144
206
  f" pair {required_env_var}: YOUR_ENV_VAR_NAME"
145
207
  )
146
208
 
209
+ # Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
210
+ dataset_mount_host = None
211
+ dataset_mount_container = None
212
+ dataset_env_var_value = None
213
+ if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
214
+ "required_env_vars", []
215
+ ):
216
+ # Get dataset directory from task config
217
+ if "dataset_dir" in task:
218
+ dataset_mount_host = task["dataset_dir"]
219
+ else:
220
+ raise ValueError(
221
+ f"{task.name} task requires a dataset_dir to be specified. "
222
+ f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
223
+ )
224
+ # Get container mount path (default to /datasets if not specified)
225
+ dataset_mount_container = task.get("dataset_mount_path", "/datasets")
226
+ # Set NEMO_EVALUATOR_DATASET_DIR to the container mount path
227
+ dataset_env_var_value = dataset_mount_container
228
+
147
229
  # format env_vars for a template
148
- env_vars = [
230
+ env_vars_list = [
149
231
  f"{env_var_dst}=${env_var_src}"
150
232
  for env_var_dst, env_var_src in env_vars.items()
151
233
  ]
152
234
 
235
+ # Add dataset env var if needed (directly with value, not from host env)
236
+ if dataset_env_var_value:
237
+ env_vars_list.append(
238
+ f"NEMO_EVALUATOR_DATASET_DIR={dataset_env_var_value}"
239
+ )
240
+
153
241
  eval_image = task_definition["container"]
154
242
  if "container" in task:
155
243
  eval_image = task["container"]
@@ -166,15 +254,22 @@ class LocalExecutor(BaseExecutor):
166
254
  # TODO(agronskiy): cleaner way is to encode everything with base64, not
167
255
  # some parts (like ef_config.yaml) and just output as logs somewhere.
168
256
  eval_factory_command_debug_comment = eval_factory_command_struct.debug
257
+ is_potentially_unsafe = (
258
+ is_potentially_unsafe
259
+ or eval_factory_command_struct.is_potentially_unsafe
260
+ )
169
261
  evaluation_task = {
262
+ "deployment": deployment,
170
263
  "name": task.name,
171
264
  "job_id": job_id,
172
265
  "eval_image": eval_image,
173
- "container_name": container_name,
174
- "env_vars": env_vars,
266
+ "client_container_name": client_container_name,
267
+ "env_vars": env_vars_list,
175
268
  "output_dir": task_output_dir,
176
269
  "eval_factory_command": eval_factory_command,
177
270
  "eval_factory_command_debug_comment": eval_factory_command_debug_comment,
271
+ "dataset_mount_host": dataset_mount_host,
272
+ "dataset_mount_container": dataset_mount_container,
178
273
  }
179
274
  evaluation_tasks.append(evaluation_task)
180
275
 
@@ -185,7 +280,7 @@ class LocalExecutor(BaseExecutor):
185
280
  extra_docker_args = cfg.execution.get("extra_docker_args", "")
186
281
 
187
282
  run_sh_content = (
188
- eval_template.render(
283
+ run_template.render(
189
284
  evaluation_tasks=[evaluation_task],
190
285
  auto_export_destinations=auto_export_destinations,
191
286
  extra_docker_args=extra_docker_args,
@@ -196,7 +291,7 @@ class LocalExecutor(BaseExecutor):
196
291
  (task_output_dir / "run.sh").write_text(run_sh_content)
197
292
 
198
293
  run_all_sequentially_sh_content = (
199
- eval_template.render(
294
+ run_template.render(
200
295
  evaluation_tasks=evaluation_tasks,
201
296
  auto_export_destinations=auto_export_destinations,
202
297
  extra_docker_args=extra_docker_args,
@@ -230,8 +325,34 @@ class LocalExecutor(BaseExecutor):
230
325
  with open(task_output_dir / "run.sh", "r") as f:
231
326
  print(grey(f.read()))
232
327
  print(bold("\nTo execute, run without --dry-run"))
328
+
329
+ if is_potentially_unsafe:
330
+ print(
331
+ red(
332
+ "\nFound `pre_cmd` which carries security risk. When running without --dry-run "
333
+ "make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
334
+ )
335
+ )
233
336
  return invocation_id
234
337
 
338
+ if is_potentially_unsafe:
339
+ if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
340
+ logger.warning(
341
+ "Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
342
+ "is set, proceeding with caution."
343
+ )
344
+
345
+ else:
346
+ logger.error(
347
+ "Found non-empty task commands (e.g. `pre_cmd`) and NEMO_EVALUATOR_TRUST_PRE_CMD "
348
+ "is not set. This might carry security risk and unstable environments. "
349
+ "To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
350
+ )
351
+ raise AttributeError(
352
+ "Untrusted command found in config, make sure you trust and "
353
+ "set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
354
+ )
355
+
235
356
  # Save launched jobs metadata
236
357
  db = ExecutionDB()
237
358
  for job_id, task, evaluation_task in zip(
@@ -245,7 +366,7 @@ class LocalExecutor(BaseExecutor):
245
366
  executor="local",
246
367
  data={
247
368
  "output_dir": str(evaluation_task["output_dir"]),
248
- "container": evaluation_task["container_name"],
369
+ "container": evaluation_task["client_container_name"],
249
370
  "eval_image": evaluation_task["eval_image"],
250
371
  },
251
372
  config=OmegaConf.to_object(cfg),
@@ -301,11 +422,11 @@ class LocalExecutor(BaseExecutor):
301
422
 
302
423
  print(bold(cyan("\nCommands for real-time monitoring:")))
303
424
  for job_id, evaluation_task in zip(job_ids, evaluation_tasks):
304
- log_file = evaluation_task["output_dir"] / "logs" / "stdout.log"
305
- print(f" tail -f {log_file}")
425
+ print(f"\n Job {job_id} ({evaluation_task['name']}):")
426
+ print(f" nemo-evaluator-launcher logs {job_id}")
306
427
 
307
428
  print(bold(cyan("\nFollow all logs for this invocation:")))
308
- print(f" tail -f {output_dir}/*/logs/stdout.log\n")
429
+ print(f" nemo-evaluator-launcher logs {invocation_id}")
309
430
 
310
431
  return invocation_id
311
432
 
@@ -501,6 +622,240 @@ class LocalExecutor(BaseExecutor):
501
622
  )
502
623
  raise RuntimeError(error_msg)
503
624
 
625
+ @staticmethod
626
+ def stream_logs(
627
+ id: Union[str, List[str]], executor_name: Optional[str] = None
628
+ ) -> Iterator[Tuple[str, str, str]]:
629
+ """Stream logs from a job or invocation group.
630
+
631
+ Args:
632
+ id: Unique job identifier, invocation identifier, or list of job IDs to stream simultaneously.
633
+
634
+ Yields:
635
+ Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
636
+ Empty lines are yielded as empty strings.
637
+ """
638
+ db = ExecutionDB()
639
+
640
+ # Handle list of job IDs for simultaneous streaming
641
+ if isinstance(id, list):
642
+ # Collect all jobs from the list of job IDs
643
+ jobs = {}
644
+ for job_id in id:
645
+ job_data = db.get_job(job_id)
646
+ if job_data is None or job_data.executor != "local":
647
+ continue
648
+ jobs[job_id] = job_data
649
+ if not jobs:
650
+ return
651
+ # If id looks like an invocation_id (no dot), get all jobs for it
652
+ elif "." not in id:
653
+ jobs = db.get_jobs(id)
654
+ if not jobs:
655
+ return
656
+ else:
657
+ # Otherwise, treat as job_id
658
+ job_data = db.get_job(id)
659
+ if job_data is None or job_data.executor != "local":
660
+ return
661
+ jobs = {id: job_data}
662
+
663
+ # Collect log file paths and metadata
664
+ log_files = []
665
+
666
+ for job_id, job_data in jobs.items():
667
+ output_dir = pathlib.Path(job_data.data.get("output_dir", ""))
668
+ if not output_dir:
669
+ continue
670
+
671
+ # Get task name from config
672
+ task_name = LocalExecutor._extract_task_name(job_data, job_id)
673
+
674
+ log_file_path = output_dir / "logs" / "client_stdout.log"
675
+
676
+ log_files.append(
677
+ {
678
+ "job_id": job_id,
679
+ "task_name": task_name,
680
+ "path": log_file_path,
681
+ "file_handle": None,
682
+ "position": 0,
683
+ }
684
+ )
685
+
686
+ if not log_files:
687
+ return
688
+
689
+ # Track which files we've seen before (for tail behavior)
690
+ file_seen_before = {}
691
+
692
+ # Open files that exist, keep track of which ones we're waiting for
693
+ # First, yield the last 15 lines from existing files
694
+ for log_info in log_files:
695
+ if log_info["path"].exists():
696
+ file_seen_before[log_info["path"]] = True
697
+ # Read and yield last 15 lines
698
+ last_lines = LocalExecutor._read_last_n_lines(log_info["path"], 15)
699
+ for line in last_lines:
700
+ yield (
701
+ log_info["job_id"],
702
+ log_info["task_name"],
703
+ line,
704
+ )
705
+ try:
706
+ log_info["file_handle"] = open(
707
+ log_info["path"], "r", encoding="utf-8", errors="replace"
708
+ )
709
+ # Seek to end if file already exists (tail behavior)
710
+ log_info["file_handle"].seek(0, 2)
711
+ log_info["position"] = log_info["file_handle"].tell()
712
+ except Exception as e:
713
+ logger.error(f"Could not open {log_info['path']}: {e}")
714
+ else:
715
+ file_seen_before[log_info["path"]] = False
716
+
717
+ try:
718
+ while True:
719
+ any_activity = False
720
+
721
+ for log_info in log_files:
722
+ # Try to open file if it doesn't exist yet
723
+ if log_info["file_handle"] is None:
724
+ if log_info["path"].exists():
725
+ try:
726
+ # If file was just created, read last 15 lines first
727
+ if not file_seen_before.get(log_info["path"], False):
728
+ last_lines = LocalExecutor._read_last_n_lines(
729
+ log_info["path"], 15
730
+ )
731
+ for line in last_lines:
732
+ yield (
733
+ log_info["job_id"],
734
+ log_info["task_name"],
735
+ line,
736
+ )
737
+ file_seen_before[log_info["path"]] = True
738
+
739
+ log_info["file_handle"] = open(
740
+ log_info["path"],
741
+ "r",
742
+ encoding="utf-8",
743
+ errors="replace",
744
+ )
745
+ # Seek to end for tail behavior
746
+ log_info["file_handle"].seek(0, 2)
747
+ log_info["position"] = log_info["file_handle"].tell()
748
+ except Exception as e:
749
+ logger.error(f"Could not open {log_info['path']}: {e}")
750
+ continue
751
+
752
+ # Read new lines from file
753
+ if log_info["file_handle"] is not None:
754
+ try:
755
+ # Check if file has grown
756
+ current_size = log_info["path"].stat().st_size
757
+ if current_size > log_info["position"]:
758
+ log_info["file_handle"].seek(log_info["position"])
759
+ new_lines = log_info["file_handle"].readlines()
760
+ log_info["position"] = log_info["file_handle"].tell()
761
+
762
+ # Yield new lines
763
+ for line in new_lines:
764
+ line_stripped = line.rstrip("\n\r")
765
+ yield (
766
+ log_info["job_id"],
767
+ log_info["task_name"],
768
+ line_stripped,
769
+ )
770
+ any_activity = True
771
+ except (OSError, IOError) as e:
772
+ # File might have been deleted or moved
773
+ # Don't log error for every check, only on first error
774
+ if log_info.get("error_printed", False) is False:
775
+ logger.error(f"Error reading {log_info['path']}: {e}")
776
+ log_info["error_printed"] = True
777
+ log_info["file_handle"] = None
778
+ except Exception:
779
+ # Reset error flag if we successfully read again
780
+ log_info["error_printed"] = False
781
+
782
+ # If no activity, sleep briefly to avoid busy waiting
783
+ if not any_activity:
784
+ time.sleep(0.1)
785
+
786
+ except KeyboardInterrupt:
787
+ # Clean exit on Ctrl+C
788
+ pass
789
+ finally:
790
+ # Close all file handles
791
+ for log_info in log_files:
792
+ if log_info["file_handle"] is not None:
793
+ try:
794
+ log_info["file_handle"].close()
795
+ except Exception:
796
+ pass
797
+
798
+ @staticmethod
799
+ def _read_last_n_lines(file_path: pathlib.Path, n: int) -> List[str]:
800
+ """Read the last N lines from a file efficiently.
801
+
802
+ Args:
803
+ file_path: Path to the file to read from.
804
+ n: Number of lines to read from the end.
805
+
806
+ Returns:
807
+ List of the last N lines (or fewer if file has fewer lines).
808
+ """
809
+ try:
810
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
811
+ # Read all lines
812
+ all_lines = f.readlines()
813
+ # Return last n lines, stripping newlines
814
+ return [line.rstrip("\n\r") for line in all_lines[-n:]]
815
+ except Exception as e:
816
+ logger.warning(f"Could not read last {n} lines from {file_path}: {e}")
817
+ return []
818
+
819
+ @staticmethod
820
+ def _extract_task_name(job_data: JobData, job_id: str) -> str:
821
+ """Extract task name from job data config.
822
+
823
+ Args:
824
+ job_data: JobData object containing config.
825
+ job_id: Job ID for error reporting.
826
+
827
+ Returns:
828
+ Task name string.
829
+ """
830
+ config = job_data.config or {}
831
+ evaluation = config.get("evaluation", {})
832
+ tasks = evaluation.get("tasks", [])
833
+
834
+ # Find the task that matches this job
835
+ # For job_id like "15b9f667.0", index is 0
836
+ try:
837
+ if "." in job_id:
838
+ index = int(job_id.split(".")[1])
839
+ if len(tasks) > 0 and index >= len(tasks):
840
+ raise AttributeError(
841
+ f"Job task index {job_id} is larger than number of tasks {len(tasks)} in invocation"
842
+ )
843
+ # If index is valid and tasks exist, return the task name
844
+ if len(tasks) > 0 and index < len(tasks):
845
+ return tasks[index].get("name", "unknown")
846
+ except (ValueError, IndexError):
847
+ pass
848
+
849
+ # Fallback: try to get task name from output_dir
850
+ # output_dir typically ends with task name
851
+ output_dir = job_data.data.get("output_dir", "")
852
+ if output_dir:
853
+ parts = pathlib.Path(output_dir).parts
854
+ if parts:
855
+ return parts[-1]
856
+
857
+ return "unknown"
858
+
504
859
  @staticmethod
505
860
  def _add_to_killed_jobs(invocation_id: str, job_id: str) -> None:
506
861
  """Add a job ID to the killed jobs file for this invocation.