hpc-runner 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner/_version.py +2 -2
- hpc_runner/cli/config.py +2 -2
- hpc_runner/cli/main.py +8 -3
- hpc_runner/cli/run.py +24 -9
- hpc_runner/cli/status.py +0 -1
- hpc_runner/cli/submit.py +0 -2
- hpc_runner/core/config.py +8 -2
- hpc_runner/core/descriptors.py +9 -3
- hpc_runner/core/job.py +6 -5
- hpc_runner/core/job_array.py +2 -1
- hpc_runner/core/resources.py +2 -1
- hpc_runner/schedulers/__init__.py +2 -2
- hpc_runner/schedulers/base.py +31 -17
- hpc_runner/schedulers/local/scheduler.py +103 -190
- hpc_runner/schedulers/local/templates/job.sh.j2 +17 -4
- hpc_runner/schedulers/sge/args.py +14 -14
- hpc_runner/schedulers/sge/parser.py +4 -4
- hpc_runner/schedulers/sge/scheduler.py +76 -78
- hpc_runner/schedulers/sge/templates/batch.sh.j2 +0 -5
- hpc_runner/schedulers/sge/templates/interactive.sh.j2 +0 -5
- hpc_runner/tui/app.py +14 -25
- hpc_runner/tui/components/filter_bar.py +2 -4
- hpc_runner/tui/components/filter_popup.py +13 -8
- hpc_runner/tui/components/job_table.py +5 -9
- hpc_runner/tui/providers/jobs.py +3 -5
- hpc_runner/tui/screens/confirm.py +3 -1
- hpc_runner/tui/screens/log_viewer.py +1 -3
- hpc_runner/tui/snapshot.py +7 -5
- hpc_runner/workflow/pipeline.py +2 -1
- {hpc_runner-0.3.0.dist-info → hpc_runner-0.3.1.dist-info}/METADATA +7 -5
- hpc_runner-0.3.1.dist-info/RECORD +57 -0
- hpc_runner-0.3.0.dist-info/RECORD +0 -57
- {hpc_runner-0.3.0.dist-info → hpc_runner-0.3.1.dist-info}/WHEEL +0 -0
- {hpc_runner-0.3.0.dist-info → hpc_runner-0.3.1.dist-info}/entry_points.txt +0 -0
|
@@ -12,13 +12,14 @@ from typing import TYPE_CHECKING
|
|
|
12
12
|
from hpc_runner.core.config import get_config
|
|
13
13
|
from hpc_runner.core.exceptions import AccountingNotAvailable, JobNotFoundError
|
|
14
14
|
from hpc_runner.core.job_info import JobInfo
|
|
15
|
-
from hpc_runner.core.result import
|
|
15
|
+
from hpc_runner.core.result import JobResult, JobStatus
|
|
16
16
|
from hpc_runner.schedulers.base import BaseScheduler
|
|
17
17
|
from hpc_runner.templates import render_template
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from hpc_runner.core.job import Job
|
|
21
21
|
from hpc_runner.core.job_array import JobArray
|
|
22
|
+
from hpc_runner.core.result import ArrayJobResult
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class LocalScheduler(BaseScheduler):
|
|
@@ -33,167 +34,83 @@ class LocalScheduler(BaseScheduler):
|
|
|
33
34
|
config = get_config()
|
|
34
35
|
local_config = config.get_scheduler_config("local")
|
|
35
36
|
|
|
37
|
+
self.purge_modules = local_config.get("purge_modules", True)
|
|
38
|
+
self.silent_modules = local_config.get("silent_modules", False)
|
|
36
39
|
self.module_init_script = local_config.get("module_init_script", "")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
) -> JobResult:
|
|
40
|
+
|
|
41
|
+
self._processes: dict[str, subprocess.Popen[bytes]] = {}
|
|
42
|
+
self._exit_codes: dict[str, int] = {}
|
|
43
|
+
self._output_paths: dict[str, dict[str, Path]] = {}
|
|
44
|
+
self._script_paths: dict[str, Path] = {}
|
|
45
|
+
|
|
46
|
+
def submit(self, job: Job, interactive: bool = False, keep_script: bool = False) -> JobResult:
|
|
45
47
|
"""Run job as local subprocess."""
|
|
46
48
|
LocalScheduler._job_counter += 1
|
|
47
49
|
job_id = f"local_{LocalScheduler._job_counter}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
|
48
50
|
|
|
49
51
|
# Set up environment
|
|
50
52
|
env = os.environ.copy() if job.inherit_env else {}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# Generate and write script
|
|
54
|
-
script = self.generate_script(job)
|
|
55
|
-
script_path = Path(tempfile.gettempdir()) / f".hpc_local_{job_id}.sh"
|
|
56
|
-
script_path.write_text(script)
|
|
57
|
-
script_path.chmod(0o755)
|
|
53
|
+
if job.env_vars:
|
|
54
|
+
env.update(job.env_vars)
|
|
58
55
|
|
|
59
|
-
workdir = Path(job.workdir) if job.workdir else Path.cwd()
|
|
56
|
+
workdir = Path(job.workdir).resolve() if job.workdir else Path.cwd()
|
|
60
57
|
|
|
61
|
-
#
|
|
62
|
-
|
|
58
|
+
# Resolve output paths for template-based redirection
|
|
59
|
+
stdout_path: Path | None = None
|
|
60
|
+
stderr_path: Path | None = None
|
|
63
61
|
|
|
64
|
-
if not
|
|
65
|
-
# Determine output paths
|
|
62
|
+
if job.stdout is not None or job.stderr is not None:
|
|
66
63
|
stdout_file = job.stdout or f"{job.name}.{job_id}.out"
|
|
67
64
|
stdout_path = workdir / stdout_file
|
|
68
65
|
if job.merge_output:
|
|
69
|
-
stderr_path =
|
|
66
|
+
stderr_path = None
|
|
70
67
|
else:
|
|
71
68
|
stderr_file = job.stderr or f"{job.name}.{job_id}.err"
|
|
72
69
|
stderr_path = workdir / stderr_file
|
|
73
70
|
|
|
74
|
-
|
|
75
|
-
LocalScheduler._output_paths[job_id] = {
|
|
71
|
+
self._output_paths[job_id] = {
|
|
76
72
|
"stdout": stdout_path,
|
|
77
|
-
"stderr": stderr_path,
|
|
73
|
+
"stderr": stderr_path if stderr_path else stdout_path,
|
|
78
74
|
}
|
|
79
75
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
env=env,
|
|
87
|
-
)
|
|
88
|
-
else:
|
|
89
|
-
with open(stdout_path, "w") as stdout_f:
|
|
90
|
-
if job.merge_output:
|
|
91
|
-
result = subprocess.run(
|
|
92
|
-
[str(script_path)],
|
|
93
|
-
cwd=workdir,
|
|
94
|
-
env=env,
|
|
95
|
-
stdout=stdout_f,
|
|
96
|
-
stderr=subprocess.STDOUT,
|
|
97
|
-
)
|
|
98
|
-
else:
|
|
99
|
-
with open(stderr_path, "w") as stderr_f:
|
|
100
|
-
result = subprocess.run(
|
|
101
|
-
[str(script_path)],
|
|
102
|
-
cwd=workdir,
|
|
103
|
-
env=env,
|
|
104
|
-
stdout=stdout_f,
|
|
105
|
-
stderr=stderr_f,
|
|
106
|
-
)
|
|
107
|
-
LocalScheduler._exit_codes[job_id] = result.returncode
|
|
108
|
-
script_path.unlink(missing_ok=True)
|
|
109
|
-
else:
|
|
110
|
-
# Background execution
|
|
111
|
-
if passthrough:
|
|
112
|
-
proc = subprocess.Popen(
|
|
113
|
-
[str(script_path)],
|
|
114
|
-
cwd=workdir,
|
|
115
|
-
env=env,
|
|
116
|
-
)
|
|
117
|
-
else:
|
|
118
|
-
stdout_f = open(stdout_path, "w")
|
|
119
|
-
if job.merge_output:
|
|
120
|
-
proc = subprocess.Popen(
|
|
121
|
-
[str(script_path)],
|
|
122
|
-
cwd=workdir,
|
|
123
|
-
env=env,
|
|
124
|
-
stdout=stdout_f,
|
|
125
|
-
stderr=subprocess.STDOUT,
|
|
126
|
-
)
|
|
127
|
-
else:
|
|
128
|
-
stderr_f = open(stderr_path, "w")
|
|
129
|
-
proc = subprocess.Popen(
|
|
130
|
-
[str(script_path)],
|
|
131
|
-
cwd=workdir,
|
|
132
|
-
env=env,
|
|
133
|
-
stdout=stdout_f,
|
|
134
|
-
stderr=stderr_f,
|
|
135
|
-
)
|
|
136
|
-
proc._stdout_file = stdout_f # type: ignore[attr-defined]
|
|
137
|
-
if not job.merge_output:
|
|
138
|
-
proc._stderr_file = stderr_f # type: ignore[attr-defined]
|
|
139
|
-
LocalScheduler._processes[job_id] = proc
|
|
140
|
-
# Store script path for cleanup
|
|
141
|
-
proc._script_path = script_path # type: ignore[attr-defined]
|
|
142
|
-
|
|
143
|
-
return JobResult(job_id=job_id, scheduler=self, job=job)
|
|
144
|
-
|
|
145
|
-
def submit_array(self, array: "JobArray") -> ArrayJobResult:
|
|
146
|
-
"""Simulate array job by submitting multiple jobs."""
|
|
147
|
-
# For local scheduler, we just run one job
|
|
148
|
-
# and return an ArrayJobResult pointing to it
|
|
149
|
-
LocalScheduler._job_counter += 1
|
|
150
|
-
base_job_id = f"local_array_{LocalScheduler._job_counter}"
|
|
151
|
-
|
|
152
|
-
# Run jobs sequentially (or could be parallel)
|
|
153
|
-
for idx in array.indices:
|
|
154
|
-
# Set array index environment variable
|
|
155
|
-
os.environ["HPC_ARRAY_TASK_ID"] = str(idx)
|
|
156
|
-
os.environ["SGE_TASK_ID"] = str(idx) # SGE compat
|
|
157
|
-
os.environ["SLURM_ARRAY_TASK_ID"] = str(idx) # Slurm compat
|
|
158
|
-
|
|
159
|
-
# Create a job ID for this task
|
|
160
|
-
task_job_id = f"{base_job_id}.{idx}"
|
|
161
|
-
self._submit_array_task(array.job, task_job_id, idx)
|
|
162
|
-
|
|
163
|
-
return ArrayJobResult(base_job_id=base_job_id, scheduler=self, array=array)
|
|
164
|
-
|
|
165
|
-
def _submit_array_task(self, job: "Job", job_id: str, index: int) -> None:
|
|
166
|
-
"""Submit a single array task."""
|
|
167
|
-
env = os.environ.copy() if job.inherit_env else {}
|
|
168
|
-
env.update(job.env_vars)
|
|
169
|
-
env["HPC_ARRAY_TASK_ID"] = str(index)
|
|
170
|
-
|
|
171
|
-
script = self.generate_script(job)
|
|
76
|
+
# Generate and write script (template handles output redirection)
|
|
77
|
+
script = self.generate_script(
|
|
78
|
+
job,
|
|
79
|
+
stdout_path=stdout_path,
|
|
80
|
+
stderr_path=stderr_path,
|
|
81
|
+
)
|
|
172
82
|
script_path = Path(tempfile.gettempdir()) / f".hpc_local_{job_id}.sh"
|
|
173
83
|
script_path.write_text(script)
|
|
174
84
|
script_path.chmod(0o755)
|
|
175
85
|
|
|
176
|
-
|
|
177
|
-
|
|
86
|
+
if interactive:
|
|
87
|
+
result = subprocess.run(
|
|
88
|
+
[str(script_path)],
|
|
89
|
+
cwd=workdir,
|
|
90
|
+
env=env,
|
|
91
|
+
)
|
|
92
|
+
self._exit_codes[job_id] = result.returncode
|
|
93
|
+
if not keep_script:
|
|
94
|
+
script_path.unlink(missing_ok=True)
|
|
95
|
+
else:
|
|
96
|
+
proc = subprocess.Popen(
|
|
97
|
+
[str(script_path)],
|
|
98
|
+
cwd=workdir,
|
|
99
|
+
env=env,
|
|
100
|
+
)
|
|
101
|
+
self._processes[job_id] = proc
|
|
102
|
+
self._script_paths[job_id] = script_path
|
|
178
103
|
|
|
179
|
-
|
|
104
|
+
return JobResult(job_id=job_id, scheduler=self, job=job)
|
|
180
105
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
cwd=workdir,
|
|
185
|
-
env=env,
|
|
186
|
-
stdout=stdout_f,
|
|
187
|
-
stderr=subprocess.STDOUT,
|
|
188
|
-
)
|
|
189
|
-
LocalScheduler._processes[job_id] = proc
|
|
190
|
-
proc._script_path = script_path # type: ignore[attr-defined]
|
|
191
|
-
proc._stdout_file = stdout_f # type: ignore[attr-defined]
|
|
106
|
+
def submit_array(self, array: JobArray) -> ArrayJobResult:
|
|
107
|
+
"""Array jobs are not supported by the local scheduler."""
|
|
108
|
+
raise NotImplementedError("Array jobs are not supported by the local scheduler")
|
|
192
109
|
|
|
193
110
|
def cancel(self, job_id: str) -> bool:
|
|
194
111
|
"""Cancel a local job."""
|
|
195
|
-
if job_id in
|
|
196
|
-
proc =
|
|
112
|
+
if job_id in self._processes:
|
|
113
|
+
proc = self._processes[job_id]
|
|
197
114
|
proc.terminate()
|
|
198
115
|
proc.wait()
|
|
199
116
|
self._cleanup_process(job_id)
|
|
@@ -202,76 +119,80 @@ class LocalScheduler(BaseScheduler):
|
|
|
202
119
|
|
|
203
120
|
def get_status(self, job_id: str) -> JobStatus:
|
|
204
121
|
"""Get job status."""
|
|
205
|
-
if job_id in
|
|
206
|
-
|
|
207
|
-
|
|
122
|
+
if job_id in self._exit_codes:
|
|
123
|
+
if self._exit_codes[job_id] == 0:
|
|
124
|
+
return JobStatus.COMPLETED
|
|
125
|
+
return JobStatus.FAILED
|
|
208
126
|
|
|
209
|
-
if job_id not in
|
|
127
|
+
if job_id not in self._processes:
|
|
210
128
|
return JobStatus.UNKNOWN
|
|
211
129
|
|
|
212
|
-
proc =
|
|
130
|
+
proc = self._processes[job_id]
|
|
213
131
|
poll = proc.poll()
|
|
214
132
|
|
|
215
133
|
if poll is None:
|
|
216
134
|
return JobStatus.RUNNING
|
|
217
135
|
|
|
218
136
|
# Process completed
|
|
219
|
-
|
|
137
|
+
self._exit_codes[job_id] = poll
|
|
220
138
|
self._cleanup_process(job_id)
|
|
221
139
|
|
|
222
140
|
return JobStatus.COMPLETED if poll == 0 else JobStatus.FAILED
|
|
223
141
|
|
|
224
142
|
def _cleanup_process(self, job_id: str) -> None:
|
|
225
143
|
"""Clean up process resources."""
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
proc._stdout_file.close() # type: ignore[attr-defined]
|
|
231
|
-
if hasattr(proc, "_stderr_file"):
|
|
232
|
-
proc._stderr_file.close() # type: ignore[attr-defined]
|
|
233
|
-
# Remove script
|
|
234
|
-
if hasattr(proc, "_script_path"):
|
|
235
|
-
proc._script_path.unlink(missing_ok=True) # type: ignore[attr-defined]
|
|
236
|
-
del LocalScheduler._processes[job_id]
|
|
144
|
+
self._processes.pop(job_id, None)
|
|
145
|
+
script_path = self._script_paths.pop(job_id, None)
|
|
146
|
+
if script_path:
|
|
147
|
+
script_path.unlink(missing_ok=True)
|
|
237
148
|
|
|
238
149
|
def get_exit_code(self, job_id: str) -> int | None:
|
|
239
150
|
"""Get exit code."""
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
return LocalScheduler._exit_codes[job_id]
|
|
151
|
+
if job_id in self._exit_codes:
|
|
152
|
+
return self._exit_codes[job_id]
|
|
243
153
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
proc = LocalScheduler._processes[job_id]
|
|
154
|
+
if job_id in self._processes:
|
|
155
|
+
proc = self._processes[job_id]
|
|
247
156
|
poll = proc.poll()
|
|
248
157
|
if poll is not None:
|
|
249
|
-
|
|
158
|
+
self._exit_codes[job_id] = poll
|
|
250
159
|
return poll
|
|
251
160
|
|
|
252
161
|
return None
|
|
253
162
|
|
|
254
163
|
def get_output_path(self, job_id: str, stream: str) -> Path | None:
|
|
255
164
|
"""Get output file path."""
|
|
256
|
-
if job_id in
|
|
257
|
-
return
|
|
165
|
+
if job_id in self._output_paths:
|
|
166
|
+
return self._output_paths[job_id].get(stream)
|
|
258
167
|
return None
|
|
259
168
|
|
|
260
|
-
def generate_script(
|
|
169
|
+
def generate_script(
|
|
170
|
+
self,
|
|
171
|
+
job: Job,
|
|
172
|
+
array_range: str | None = None,
|
|
173
|
+
*,
|
|
174
|
+
stdout_path: Path | None = None,
|
|
175
|
+
stderr_path: Path | None = None,
|
|
176
|
+
) -> str:
|
|
261
177
|
"""Generate local execution script."""
|
|
262
178
|
return render_template(
|
|
263
179
|
"local/templates/job.sh.j2",
|
|
264
180
|
job=job,
|
|
265
181
|
scheduler=self,
|
|
182
|
+
stdout_path=stdout_path,
|
|
183
|
+
stderr_path=stderr_path,
|
|
184
|
+
merge_output=job.merge_output,
|
|
266
185
|
)
|
|
267
186
|
|
|
268
|
-
def build_submit_command(self, job:
|
|
187
|
+
def build_submit_command(self, job: Job) -> list[str]:
|
|
269
188
|
"""Build command - for local, just bash."""
|
|
270
|
-
|
|
189
|
+
cmd = job.command if isinstance(job.command, str) else " ".join(job.command)
|
|
190
|
+
return ["bash", "-c", cmd]
|
|
271
191
|
|
|
272
|
-
def build_interactive_command(self, job:
|
|
192
|
+
def build_interactive_command(self, job: Job) -> list[str]:
|
|
273
193
|
"""Build interactive command - for local, just bash."""
|
|
274
|
-
|
|
194
|
+
cmd = job.command if isinstance(job.command, str) else " ".join(job.command)
|
|
195
|
+
return ["bash", "-c", cmd]
|
|
275
196
|
|
|
276
197
|
# -------------------------------------------------------------------------
|
|
277
198
|
# TUI Monitor API (stubs for local scheduler)
|
|
@@ -283,31 +204,26 @@ class LocalScheduler(BaseScheduler):
|
|
|
283
204
|
status: set[JobStatus] | None = None,
|
|
284
205
|
queue: str | None = None,
|
|
285
206
|
) -> list[JobInfo]:
|
|
286
|
-
"""List active local jobs.
|
|
287
|
-
|
|
288
|
-
The local scheduler tracks running processes in memory.
|
|
289
|
-
"""
|
|
207
|
+
"""List active local jobs."""
|
|
290
208
|
jobs: list[JobInfo] = []
|
|
291
209
|
current_user = os.environ.get("USER", "unknown")
|
|
292
210
|
|
|
293
|
-
for job_id, proc in
|
|
211
|
+
for job_id, proc in self._processes.items():
|
|
294
212
|
poll = proc.poll()
|
|
295
|
-
if poll is None:
|
|
213
|
+
if poll is None:
|
|
296
214
|
job_status = JobStatus.RUNNING
|
|
297
215
|
else:
|
|
298
|
-
continue
|
|
216
|
+
continue
|
|
299
217
|
|
|
300
|
-
# Apply filters
|
|
301
218
|
if user is not None and user != current_user:
|
|
302
219
|
continue
|
|
303
220
|
if status is not None and job_status not in status:
|
|
304
221
|
continue
|
|
305
|
-
# queue filter doesn't apply to local scheduler
|
|
306
222
|
|
|
307
223
|
jobs.append(
|
|
308
224
|
JobInfo(
|
|
309
225
|
job_id=job_id,
|
|
310
|
-
name=job_id,
|
|
226
|
+
name=job_id,
|
|
311
227
|
user=current_user,
|
|
312
228
|
status=job_status,
|
|
313
229
|
queue="local",
|
|
@@ -336,22 +252,20 @@ class LocalScheduler(BaseScheduler):
|
|
|
336
252
|
)
|
|
337
253
|
|
|
338
254
|
def has_accounting(self) -> bool:
|
|
339
|
-
"""Check if job accounting is available.
|
|
340
|
-
|
|
341
|
-
Local scheduler does not have persistent accounting.
|
|
342
|
-
"""
|
|
255
|
+
"""Check if job accounting is available."""
|
|
343
256
|
return False
|
|
344
257
|
|
|
345
258
|
def get_job_details(self, job_id: str) -> tuple[JobInfo, dict[str, object]]:
|
|
346
259
|
"""Get details for a local job."""
|
|
347
260
|
current_user = os.environ.get("USER", "unknown")
|
|
348
261
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
proc = LocalScheduler._processes[job_id]
|
|
262
|
+
if job_id in self._processes:
|
|
263
|
+
proc = self._processes[job_id]
|
|
352
264
|
poll = proc.poll()
|
|
353
|
-
status =
|
|
354
|
-
JobStatus.
|
|
265
|
+
status = (
|
|
266
|
+
JobStatus.RUNNING
|
|
267
|
+
if poll is None
|
|
268
|
+
else (JobStatus.COMPLETED if poll == 0 else JobStatus.FAILED)
|
|
355
269
|
)
|
|
356
270
|
job_info = JobInfo(
|
|
357
271
|
job_id=job_id,
|
|
@@ -360,14 +274,13 @@ class LocalScheduler(BaseScheduler):
|
|
|
360
274
|
status=status,
|
|
361
275
|
queue="local",
|
|
362
276
|
exit_code=poll if poll is not None else None,
|
|
363
|
-
stdout_path=
|
|
364
|
-
stderr_path=
|
|
277
|
+
stdout_path=self._output_paths.get(job_id, {}).get("stdout"),
|
|
278
|
+
stderr_path=self._output_paths.get(job_id, {}).get("stderr"),
|
|
365
279
|
)
|
|
366
280
|
return job_info, {}
|
|
367
281
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
exit_code = LocalScheduler._exit_codes[job_id]
|
|
282
|
+
if job_id in self._exit_codes:
|
|
283
|
+
exit_code = self._exit_codes[job_id]
|
|
371
284
|
job_info = JobInfo(
|
|
372
285
|
job_id=job_id,
|
|
373
286
|
name=job_id,
|
|
@@ -375,8 +288,8 @@ class LocalScheduler(BaseScheduler):
|
|
|
375
288
|
status=JobStatus.COMPLETED if exit_code == 0 else JobStatus.FAILED,
|
|
376
289
|
queue="local",
|
|
377
290
|
exit_code=exit_code,
|
|
378
|
-
stdout_path=
|
|
379
|
-
stderr_path=
|
|
291
|
+
stdout_path=self._output_paths.get(job_id, {}).get("stdout"),
|
|
292
|
+
stderr_path=self._output_paths.get(job_id, {}).get("stderr"),
|
|
380
293
|
)
|
|
381
294
|
return job_info, {}
|
|
382
295
|
|
|
@@ -18,6 +18,11 @@ elif [ -f /etc/modules/init/bash ]; then
|
|
|
18
18
|
fi
|
|
19
19
|
{% endif %}
|
|
20
20
|
|
|
21
|
+
{% if scheduler.purge_modules %}
|
|
22
|
+
# Purge modules for clean environment
|
|
23
|
+
module purge{% if scheduler.silent_modules %} -s{% endif %}
|
|
24
|
+
|
|
25
|
+
{% endif %}
|
|
21
26
|
{% if job.modules_path %}
|
|
22
27
|
# Additional module paths
|
|
23
28
|
{% for path in job.modules_path %}
|
|
@@ -28,7 +33,8 @@ module use {{ path }}
|
|
|
28
33
|
{% if job.modules %}
|
|
29
34
|
# Load modules
|
|
30
35
|
{% for mod in job.modules %}
|
|
31
|
-
module load {{ mod }}
|
|
36
|
+
module load {{ mod }}{% if scheduler.silent_modules %} -s{% endif %}
|
|
37
|
+
|
|
32
38
|
{% endfor %}
|
|
33
39
|
{% endif %}
|
|
34
40
|
|
|
@@ -58,9 +64,16 @@ export {{ key }}="{{ value }}"
|
|
|
58
64
|
{% endfor %}
|
|
59
65
|
{% endif %}
|
|
60
66
|
|
|
61
|
-
{% if
|
|
62
|
-
#
|
|
63
|
-
|
|
67
|
+
{% if stdout_path and merge_output %}
|
|
68
|
+
# Redirect stdout and stderr to file
|
|
69
|
+
exec > {{ stdout_path }} 2>&1
|
|
70
|
+
{% elif stdout_path %}
|
|
71
|
+
# Redirect stdout to file
|
|
72
|
+
exec > {{ stdout_path }}
|
|
73
|
+
{% endif %}
|
|
74
|
+
{% if stderr_path and not merge_output %}
|
|
75
|
+
# Redirect stderr to file
|
|
76
|
+
exec 2> {{ stderr_path }}
|
|
64
77
|
{% endif %}
|
|
65
78
|
|
|
66
79
|
# Execute command
|
|
@@ -7,7 +7,7 @@ both as a script directive (#$ ...) and as command-line arguments.
|
|
|
7
7
|
from hpc_runner.core.descriptors import SchedulerArg
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class SGEArg(SchedulerArg):
|
|
10
|
+
class SGEArg(SchedulerArg[str]):
|
|
11
11
|
"""Base class for SGE arguments.
|
|
12
12
|
|
|
13
13
|
SGE uses:
|
|
@@ -15,12 +15,12 @@ class SGEArg(SchedulerArg):
|
|
|
15
15
|
- CLI args: -flag value
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
def to_args(self, value) -> list[str]:
|
|
18
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
19
19
|
if value is None:
|
|
20
20
|
return []
|
|
21
21
|
return [f"-{self.flag}", str(value)]
|
|
22
22
|
|
|
23
|
-
def to_directive(self, value) -> str | None:
|
|
23
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
24
24
|
if value is None:
|
|
25
25
|
return None
|
|
26
26
|
return f"#$ -{self.flag} {value}"
|
|
@@ -34,42 +34,42 @@ class SGEArg(SchedulerArg):
|
|
|
34
34
|
class SGEJobNameArg(SGEArg):
|
|
35
35
|
"""Job name: -N name"""
|
|
36
36
|
|
|
37
|
-
def __init__(self):
|
|
37
|
+
def __init__(self) -> None:
|
|
38
38
|
super().__init__("N", doc="Job name")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class SGEQueueArg(SGEArg):
|
|
42
42
|
"""Queue selection: -q queue_name"""
|
|
43
43
|
|
|
44
|
-
def __init__(self):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
45
|
super().__init__("q", doc="Queue/partition name")
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class SGEOutputArg(SGEArg):
|
|
49
49
|
"""Stdout path: -o path"""
|
|
50
50
|
|
|
51
|
-
def __init__(self):
|
|
51
|
+
def __init__(self) -> None:
|
|
52
52
|
super().__init__("o", doc="Stdout file path")
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class SGEErrorArg(SGEArg):
|
|
56
56
|
"""Stderr path: -e path"""
|
|
57
57
|
|
|
58
|
-
def __init__(self):
|
|
58
|
+
def __init__(self) -> None:
|
|
59
59
|
super().__init__("e", doc="Stderr file path")
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class SGEPriorityArg(SGEArg):
|
|
63
63
|
"""Job priority: -p priority"""
|
|
64
64
|
|
|
65
|
-
def __init__(self):
|
|
65
|
+
def __init__(self) -> None:
|
|
66
66
|
super().__init__("p", doc="Job priority (-1023 to 1024)")
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
class SGEShellArg(SGEArg):
|
|
70
70
|
"""Shell selection: -S /path/to/shell"""
|
|
71
71
|
|
|
72
|
-
def __init__(self):
|
|
72
|
+
def __init__(self) -> None:
|
|
73
73
|
super().__init__("S", doc="Shell path")
|
|
74
74
|
|
|
75
75
|
|
|
@@ -81,7 +81,7 @@ class SGEShellArg(SGEArg):
|
|
|
81
81
|
class SGECwdArg(SchedulerArg[bool]):
|
|
82
82
|
"""Use current working directory: -cwd"""
|
|
83
83
|
|
|
84
|
-
def __init__(self):
|
|
84
|
+
def __init__(self) -> None:
|
|
85
85
|
super().__init__("cwd", doc="Execute in current working directory")
|
|
86
86
|
|
|
87
87
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -94,7 +94,7 @@ class SGECwdArg(SchedulerArg[bool]):
|
|
|
94
94
|
class SGEInheritEnvArg(SchedulerArg[bool]):
|
|
95
95
|
"""Inherit environment: -V"""
|
|
96
96
|
|
|
97
|
-
def __init__(self):
|
|
97
|
+
def __init__(self) -> None:
|
|
98
98
|
super().__init__("V", doc="Inherit environment variables")
|
|
99
99
|
|
|
100
100
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -107,7 +107,7 @@ class SGEInheritEnvArg(SchedulerArg[bool]):
|
|
|
107
107
|
class SGEMergeOutputArg(SchedulerArg[bool]):
|
|
108
108
|
"""Merge stdout and stderr: -j y"""
|
|
109
109
|
|
|
110
|
-
def __init__(self):
|
|
110
|
+
def __init__(self) -> None:
|
|
111
111
|
super().__init__("j", doc="Join stdout and stderr")
|
|
112
112
|
|
|
113
113
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -196,7 +196,7 @@ class SGEArrayArg(SchedulerArg[str]):
|
|
|
196
196
|
Range formats: 1-100, 1-100:10, 1,2,3,4
|
|
197
197
|
"""
|
|
198
198
|
|
|
199
|
-
def __init__(self):
|
|
199
|
+
def __init__(self) -> None:
|
|
200
200
|
super().__init__("t", doc="Array job range")
|
|
201
201
|
|
|
202
202
|
def to_args(self, value: str | None) -> list[str]:
|
|
@@ -218,7 +218,7 @@ class SGEArrayArg(SchedulerArg[str]):
|
|
|
218
218
|
class SGEHoldArg(SchedulerArg[str]):
|
|
219
219
|
"""Job dependency: -hold_jid job_id[,job_id,...]"""
|
|
220
220
|
|
|
221
|
-
def __init__(self):
|
|
221
|
+
def __init__(self) -> None:
|
|
222
222
|
super().__init__("hold_jid", doc="Hold until jobs complete")
|
|
223
223
|
|
|
224
224
|
def to_args(self, value: str | None) -> list[str]:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""SGE output parsing utilities."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
-
from datetime import datetime
|
|
5
4
|
import xml.etree.ElementTree as ET
|
|
5
|
+
from datetime import datetime
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from hpc_runner.core.result import JobStatus
|
|
@@ -131,9 +131,9 @@ def parse_qstat_plain(output: str) -> dict[str, Any]:
|
|
|
131
131
|
"""Parse plain qstat output.
|
|
132
132
|
|
|
133
133
|
Format:
|
|
134
|
-
job-ID prior name user state submit/start at queue
|
|
135
|
-
|
|
136
|
-
12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1
|
|
134
|
+
job-ID prior name user state submit/start at queue slots ja-task-ID
|
|
135
|
+
--------------------------------------------------------------------------
|
|
136
|
+
12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1 1
|
|
137
137
|
"""
|
|
138
138
|
jobs: dict[str, Any] = {}
|
|
139
139
|
|