hpc-runner 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner/_version.py +2 -2
- hpc_runner/cli/config.py +2 -2
- hpc_runner/cli/main.py +8 -3
- hpc_runner/cli/run.py +24 -9
- hpc_runner/cli/status.py +0 -1
- hpc_runner/cli/submit.py +0 -2
- hpc_runner/core/config.py +8 -2
- hpc_runner/core/descriptors.py +9 -3
- hpc_runner/core/job.py +6 -5
- hpc_runner/core/job_array.py +2 -1
- hpc_runner/core/resources.py +2 -1
- hpc_runner/schedulers/__init__.py +2 -2
- hpc_runner/schedulers/base.py +31 -17
- hpc_runner/schedulers/local/scheduler.py +119 -177
- hpc_runner/schedulers/local/templates/job.sh.j2 +38 -7
- hpc_runner/schedulers/sge/args.py +14 -14
- hpc_runner/schedulers/sge/parser.py +4 -4
- hpc_runner/schedulers/sge/scheduler.py +76 -78
- hpc_runner/schedulers/sge/templates/batch.sh.j2 +0 -5
- hpc_runner/schedulers/sge/templates/interactive.sh.j2 +0 -5
- hpc_runner/tui/app.py +14 -25
- hpc_runner/tui/components/filter_bar.py +2 -4
- hpc_runner/tui/components/filter_popup.py +13 -8
- hpc_runner/tui/components/job_table.py +5 -9
- hpc_runner/tui/providers/jobs.py +3 -5
- hpc_runner/tui/screens/confirm.py +3 -1
- hpc_runner/tui/screens/log_viewer.py +1 -3
- hpc_runner/tui/snapshot.py +7 -5
- hpc_runner/workflow/pipeline.py +2 -1
- {hpc_runner-0.2.2.dist-info → hpc_runner-0.3.1.dist-info}/METADATA +7 -5
- hpc_runner-0.3.1.dist-info/RECORD +57 -0
- hpc_runner-0.2.2.dist-info/RECORD +0 -57
- {hpc_runner-0.2.2.dist-info → hpc_runner-0.3.1.dist-info}/WHEEL +0 -0
- {hpc_runner-0.2.2.dist-info → hpc_runner-0.3.1.dist-info}/entry_points.txt +0 -0
|
@@ -9,15 +9,17 @@ from datetime import datetime
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import TYPE_CHECKING
|
|
11
11
|
|
|
12
|
+
from hpc_runner.core.config import get_config
|
|
12
13
|
from hpc_runner.core.exceptions import AccountingNotAvailable, JobNotFoundError
|
|
13
14
|
from hpc_runner.core.job_info import JobInfo
|
|
14
|
-
from hpc_runner.core.result import
|
|
15
|
+
from hpc_runner.core.result import JobResult, JobStatus
|
|
15
16
|
from hpc_runner.schedulers.base import BaseScheduler
|
|
16
17
|
from hpc_runner.templates import render_template
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
19
20
|
from hpc_runner.core.job import Job
|
|
20
21
|
from hpc_runner.core.job_array import JobArray
|
|
22
|
+
from hpc_runner.core.result import ArrayJobResult
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class LocalScheduler(BaseScheduler):
|
|
@@ -26,145 +28,89 @@ class LocalScheduler(BaseScheduler):
|
|
|
26
28
|
name = "local"
|
|
27
29
|
|
|
28
30
|
_job_counter: int = 0
|
|
29
|
-
_processes: dict[str, subprocess.Popen] = {} # type: ignore[type-arg]
|
|
30
|
-
_exit_codes: dict[str, int] = {}
|
|
31
|
-
_output_paths: dict[str, dict[str, Path]] = {}
|
|
32
31
|
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
"""Initialize local scheduler with config-driven settings."""
|
|
34
|
+
config = get_config()
|
|
35
|
+
local_config = config.get_scheduler_config("local")
|
|
36
|
+
|
|
37
|
+
self.purge_modules = local_config.get("purge_modules", True)
|
|
38
|
+
self.silent_modules = local_config.get("silent_modules", False)
|
|
39
|
+
self.module_init_script = local_config.get("module_init_script", "")
|
|
40
|
+
|
|
41
|
+
self._processes: dict[str, subprocess.Popen[bytes]] = {}
|
|
42
|
+
self._exit_codes: dict[str, int] = {}
|
|
43
|
+
self._output_paths: dict[str, dict[str, Path]] = {}
|
|
44
|
+
self._script_paths: dict[str, Path] = {}
|
|
45
|
+
|
|
46
|
+
def submit(self, job: Job, interactive: bool = False, keep_script: bool = False) -> JobResult:
|
|
36
47
|
"""Run job as local subprocess."""
|
|
37
48
|
LocalScheduler._job_counter += 1
|
|
38
49
|
job_id = f"local_{LocalScheduler._job_counter}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
|
39
50
|
|
|
40
|
-
# Set up environment
|
|
51
|
+
# Set up environment
|
|
41
52
|
env = os.environ.copy() if job.inherit_env else {}
|
|
53
|
+
if job.env_vars:
|
|
54
|
+
env.update(job.env_vars)
|
|
42
55
|
|
|
43
|
-
|
|
44
|
-
script = self.generate_script(job)
|
|
45
|
-
script_path = Path(tempfile.gettempdir()) / f".hpc_local_{job_id}.sh"
|
|
46
|
-
script_path.write_text(script)
|
|
47
|
-
script_path.chmod(0o755)
|
|
48
|
-
|
|
49
|
-
workdir = Path(job.workdir) if job.workdir else Path.cwd()
|
|
56
|
+
workdir = Path(job.workdir).resolve() if job.workdir else Path.cwd()
|
|
50
57
|
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if job.merge_output:
|
|
55
|
-
stderr_path = stdout_path # Merge stderr into stdout
|
|
56
|
-
else:
|
|
57
|
-
stderr_file = job.stderr or f"{job.name}.{job_id}.err"
|
|
58
|
-
stderr_path = workdir / stderr_file
|
|
58
|
+
# Resolve output paths for template-based redirection
|
|
59
|
+
stdout_path: Path | None = None
|
|
60
|
+
stderr_path: Path | None = None
|
|
59
61
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"stderr": stderr_path,
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
if interactive:
|
|
67
|
-
# Blocking execution
|
|
68
|
-
with open(stdout_path, "w") as stdout_f:
|
|
69
|
-
if job.merge_output:
|
|
70
|
-
result = subprocess.run(
|
|
71
|
-
[str(script_path)],
|
|
72
|
-
cwd=workdir,
|
|
73
|
-
env=env,
|
|
74
|
-
stdout=stdout_f,
|
|
75
|
-
stderr=subprocess.STDOUT,
|
|
76
|
-
)
|
|
77
|
-
else:
|
|
78
|
-
with open(stderr_path, "w") as stderr_f:
|
|
79
|
-
result = subprocess.run(
|
|
80
|
-
[str(script_path)],
|
|
81
|
-
cwd=workdir,
|
|
82
|
-
env=env,
|
|
83
|
-
stdout=stdout_f,
|
|
84
|
-
stderr=stderr_f,
|
|
85
|
-
)
|
|
86
|
-
LocalScheduler._exit_codes[job_id] = result.returncode
|
|
87
|
-
script_path.unlink(missing_ok=True)
|
|
88
|
-
else:
|
|
89
|
-
# Background execution
|
|
90
|
-
stdout_f = open(stdout_path, "w")
|
|
62
|
+
if job.stdout is not None or job.stderr is not None:
|
|
63
|
+
stdout_file = job.stdout or f"{job.name}.{job_id}.out"
|
|
64
|
+
stdout_path = workdir / stdout_file
|
|
91
65
|
if job.merge_output:
|
|
92
|
-
|
|
93
|
-
[str(script_path)],
|
|
94
|
-
cwd=workdir,
|
|
95
|
-
env=env,
|
|
96
|
-
stdout=stdout_f,
|
|
97
|
-
stderr=subprocess.STDOUT,
|
|
98
|
-
)
|
|
66
|
+
stderr_path = None
|
|
99
67
|
else:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return JobResult(job_id=job_id, scheduler=self, job=job)
|
|
116
|
-
|
|
117
|
-
def submit_array(self, array: "JobArray") -> ArrayJobResult:
|
|
118
|
-
"""Simulate array job by submitting multiple jobs."""
|
|
119
|
-
# For local scheduler, we just run one job
|
|
120
|
-
# and return an ArrayJobResult pointing to it
|
|
121
|
-
LocalScheduler._job_counter += 1
|
|
122
|
-
base_job_id = f"local_array_{LocalScheduler._job_counter}"
|
|
123
|
-
|
|
124
|
-
# Run jobs sequentially (or could be parallel)
|
|
125
|
-
for idx in array.indices:
|
|
126
|
-
# Set array index environment variable
|
|
127
|
-
os.environ["HPC_ARRAY_TASK_ID"] = str(idx)
|
|
128
|
-
os.environ["SGE_TASK_ID"] = str(idx) # SGE compat
|
|
129
|
-
os.environ["SLURM_ARRAY_TASK_ID"] = str(idx) # Slurm compat
|
|
130
|
-
|
|
131
|
-
# Create a job ID for this task
|
|
132
|
-
task_job_id = f"{base_job_id}.{idx}"
|
|
133
|
-
self._submit_array_task(array.job, task_job_id, idx)
|
|
134
|
-
|
|
135
|
-
return ArrayJobResult(base_job_id=base_job_id, scheduler=self, array=array)
|
|
136
|
-
|
|
137
|
-
def _submit_array_task(self, job: "Job", job_id: str, index: int) -> None:
|
|
138
|
-
"""Submit a single array task."""
|
|
139
|
-
env = os.environ.copy() if job.inherit_env else {}
|
|
140
|
-
env["HPC_ARRAY_TASK_ID"] = str(index)
|
|
141
|
-
|
|
142
|
-
script = self.generate_script(job)
|
|
68
|
+
stderr_file = job.stderr or f"{job.name}.{job_id}.err"
|
|
69
|
+
stderr_path = workdir / stderr_file
|
|
70
|
+
|
|
71
|
+
self._output_paths[job_id] = {
|
|
72
|
+
"stdout": stdout_path,
|
|
73
|
+
"stderr": stderr_path if stderr_path else stdout_path,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Generate and write script (template handles output redirection)
|
|
77
|
+
script = self.generate_script(
|
|
78
|
+
job,
|
|
79
|
+
stdout_path=stdout_path,
|
|
80
|
+
stderr_path=stderr_path,
|
|
81
|
+
)
|
|
143
82
|
script_path = Path(tempfile.gettempdir()) / f".hpc_local_{job_id}.sh"
|
|
144
83
|
script_path.write_text(script)
|
|
145
84
|
script_path.chmod(0o755)
|
|
146
85
|
|
|
147
|
-
|
|
148
|
-
|
|
86
|
+
if interactive:
|
|
87
|
+
result = subprocess.run(
|
|
88
|
+
[str(script_path)],
|
|
89
|
+
cwd=workdir,
|
|
90
|
+
env=env,
|
|
91
|
+
)
|
|
92
|
+
self._exit_codes[job_id] = result.returncode
|
|
93
|
+
if not keep_script:
|
|
94
|
+
script_path.unlink(missing_ok=True)
|
|
95
|
+
else:
|
|
96
|
+
proc = subprocess.Popen(
|
|
97
|
+
[str(script_path)],
|
|
98
|
+
cwd=workdir,
|
|
99
|
+
env=env,
|
|
100
|
+
)
|
|
101
|
+
self._processes[job_id] = proc
|
|
102
|
+
self._script_paths[job_id] = script_path
|
|
149
103
|
|
|
150
|
-
|
|
104
|
+
return JobResult(job_id=job_id, scheduler=self, job=job)
|
|
151
105
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
cwd=workdir,
|
|
156
|
-
env=env,
|
|
157
|
-
stdout=stdout_f,
|
|
158
|
-
stderr=subprocess.STDOUT,
|
|
159
|
-
)
|
|
160
|
-
LocalScheduler._processes[job_id] = proc
|
|
161
|
-
proc._script_path = script_path # type: ignore[attr-defined]
|
|
162
|
-
proc._stdout_file = stdout_f # type: ignore[attr-defined]
|
|
106
|
+
def submit_array(self, array: JobArray) -> ArrayJobResult:
|
|
107
|
+
"""Array jobs are not supported by the local scheduler."""
|
|
108
|
+
raise NotImplementedError("Array jobs are not supported by the local scheduler")
|
|
163
109
|
|
|
164
110
|
def cancel(self, job_id: str) -> bool:
|
|
165
111
|
"""Cancel a local job."""
|
|
166
|
-
if job_id in
|
|
167
|
-
proc =
|
|
112
|
+
if job_id in self._processes:
|
|
113
|
+
proc = self._processes[job_id]
|
|
168
114
|
proc.terminate()
|
|
169
115
|
proc.wait()
|
|
170
116
|
self._cleanup_process(job_id)
|
|
@@ -173,76 +119,80 @@ class LocalScheduler(BaseScheduler):
|
|
|
173
119
|
|
|
174
120
|
def get_status(self, job_id: str) -> JobStatus:
|
|
175
121
|
"""Get job status."""
|
|
176
|
-
if job_id in
|
|
177
|
-
|
|
178
|
-
|
|
122
|
+
if job_id in self._exit_codes:
|
|
123
|
+
if self._exit_codes[job_id] == 0:
|
|
124
|
+
return JobStatus.COMPLETED
|
|
125
|
+
return JobStatus.FAILED
|
|
179
126
|
|
|
180
|
-
if job_id not in
|
|
127
|
+
if job_id not in self._processes:
|
|
181
128
|
return JobStatus.UNKNOWN
|
|
182
129
|
|
|
183
|
-
proc =
|
|
130
|
+
proc = self._processes[job_id]
|
|
184
131
|
poll = proc.poll()
|
|
185
132
|
|
|
186
133
|
if poll is None:
|
|
187
134
|
return JobStatus.RUNNING
|
|
188
135
|
|
|
189
136
|
# Process completed
|
|
190
|
-
|
|
137
|
+
self._exit_codes[job_id] = poll
|
|
191
138
|
self._cleanup_process(job_id)
|
|
192
139
|
|
|
193
140
|
return JobStatus.COMPLETED if poll == 0 else JobStatus.FAILED
|
|
194
141
|
|
|
195
142
|
def _cleanup_process(self, job_id: str) -> None:
|
|
196
143
|
"""Clean up process resources."""
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
proc._stdout_file.close() # type: ignore[attr-defined]
|
|
202
|
-
if hasattr(proc, "_stderr_file"):
|
|
203
|
-
proc._stderr_file.close() # type: ignore[attr-defined]
|
|
204
|
-
# Remove script
|
|
205
|
-
if hasattr(proc, "_script_path"):
|
|
206
|
-
proc._script_path.unlink(missing_ok=True) # type: ignore[attr-defined]
|
|
207
|
-
del LocalScheduler._processes[job_id]
|
|
144
|
+
self._processes.pop(job_id, None)
|
|
145
|
+
script_path = self._script_paths.pop(job_id, None)
|
|
146
|
+
if script_path:
|
|
147
|
+
script_path.unlink(missing_ok=True)
|
|
208
148
|
|
|
209
149
|
def get_exit_code(self, job_id: str) -> int | None:
|
|
210
150
|
"""Get exit code."""
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
return LocalScheduler._exit_codes[job_id]
|
|
151
|
+
if job_id in self._exit_codes:
|
|
152
|
+
return self._exit_codes[job_id]
|
|
214
153
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
proc = LocalScheduler._processes[job_id]
|
|
154
|
+
if job_id in self._processes:
|
|
155
|
+
proc = self._processes[job_id]
|
|
218
156
|
poll = proc.poll()
|
|
219
157
|
if poll is not None:
|
|
220
|
-
|
|
158
|
+
self._exit_codes[job_id] = poll
|
|
221
159
|
return poll
|
|
222
160
|
|
|
223
161
|
return None
|
|
224
162
|
|
|
225
163
|
def get_output_path(self, job_id: str, stream: str) -> Path | None:
|
|
226
164
|
"""Get output file path."""
|
|
227
|
-
if job_id in
|
|
228
|
-
return
|
|
165
|
+
if job_id in self._output_paths:
|
|
166
|
+
return self._output_paths[job_id].get(stream)
|
|
229
167
|
return None
|
|
230
168
|
|
|
231
|
-
def generate_script(
|
|
169
|
+
def generate_script(
|
|
170
|
+
self,
|
|
171
|
+
job: Job,
|
|
172
|
+
array_range: str | None = None,
|
|
173
|
+
*,
|
|
174
|
+
stdout_path: Path | None = None,
|
|
175
|
+
stderr_path: Path | None = None,
|
|
176
|
+
) -> str:
|
|
232
177
|
"""Generate local execution script."""
|
|
233
178
|
return render_template(
|
|
234
179
|
"local/templates/job.sh.j2",
|
|
235
180
|
job=job,
|
|
236
181
|
scheduler=self,
|
|
182
|
+
stdout_path=stdout_path,
|
|
183
|
+
stderr_path=stderr_path,
|
|
184
|
+
merge_output=job.merge_output,
|
|
237
185
|
)
|
|
238
186
|
|
|
239
|
-
def build_submit_command(self, job:
|
|
187
|
+
def build_submit_command(self, job: Job) -> list[str]:
|
|
240
188
|
"""Build command - for local, just bash."""
|
|
241
|
-
|
|
189
|
+
cmd = job.command if isinstance(job.command, str) else " ".join(job.command)
|
|
190
|
+
return ["bash", "-c", cmd]
|
|
242
191
|
|
|
243
|
-
def build_interactive_command(self, job:
|
|
192
|
+
def build_interactive_command(self, job: Job) -> list[str]:
|
|
244
193
|
"""Build interactive command - for local, just bash."""
|
|
245
|
-
|
|
194
|
+
cmd = job.command if isinstance(job.command, str) else " ".join(job.command)
|
|
195
|
+
return ["bash", "-c", cmd]
|
|
246
196
|
|
|
247
197
|
# -------------------------------------------------------------------------
|
|
248
198
|
# TUI Monitor API (stubs for local scheduler)
|
|
@@ -254,31 +204,26 @@ class LocalScheduler(BaseScheduler):
|
|
|
254
204
|
status: set[JobStatus] | None = None,
|
|
255
205
|
queue: str | None = None,
|
|
256
206
|
) -> list[JobInfo]:
|
|
257
|
-
"""List active local jobs.
|
|
258
|
-
|
|
259
|
-
The local scheduler tracks running processes in memory.
|
|
260
|
-
"""
|
|
207
|
+
"""List active local jobs."""
|
|
261
208
|
jobs: list[JobInfo] = []
|
|
262
209
|
current_user = os.environ.get("USER", "unknown")
|
|
263
210
|
|
|
264
|
-
for job_id, proc in
|
|
211
|
+
for job_id, proc in self._processes.items():
|
|
265
212
|
poll = proc.poll()
|
|
266
|
-
if poll is None:
|
|
213
|
+
if poll is None:
|
|
267
214
|
job_status = JobStatus.RUNNING
|
|
268
215
|
else:
|
|
269
|
-
continue
|
|
216
|
+
continue
|
|
270
217
|
|
|
271
|
-
# Apply filters
|
|
272
218
|
if user is not None and user != current_user:
|
|
273
219
|
continue
|
|
274
220
|
if status is not None and job_status not in status:
|
|
275
221
|
continue
|
|
276
|
-
# queue filter doesn't apply to local scheduler
|
|
277
222
|
|
|
278
223
|
jobs.append(
|
|
279
224
|
JobInfo(
|
|
280
225
|
job_id=job_id,
|
|
281
|
-
name=job_id,
|
|
226
|
+
name=job_id,
|
|
282
227
|
user=current_user,
|
|
283
228
|
status=job_status,
|
|
284
229
|
queue="local",
|
|
@@ -307,22 +252,20 @@ class LocalScheduler(BaseScheduler):
|
|
|
307
252
|
)
|
|
308
253
|
|
|
309
254
|
def has_accounting(self) -> bool:
|
|
310
|
-
"""Check if job accounting is available.
|
|
311
|
-
|
|
312
|
-
Local scheduler does not have persistent accounting.
|
|
313
|
-
"""
|
|
255
|
+
"""Check if job accounting is available."""
|
|
314
256
|
return False
|
|
315
257
|
|
|
316
258
|
def get_job_details(self, job_id: str) -> tuple[JobInfo, dict[str, object]]:
|
|
317
259
|
"""Get details for a local job."""
|
|
318
260
|
current_user = os.environ.get("USER", "unknown")
|
|
319
261
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
proc = LocalScheduler._processes[job_id]
|
|
262
|
+
if job_id in self._processes:
|
|
263
|
+
proc = self._processes[job_id]
|
|
323
264
|
poll = proc.poll()
|
|
324
|
-
status =
|
|
325
|
-
JobStatus.
|
|
265
|
+
status = (
|
|
266
|
+
JobStatus.RUNNING
|
|
267
|
+
if poll is None
|
|
268
|
+
else (JobStatus.COMPLETED if poll == 0 else JobStatus.FAILED)
|
|
326
269
|
)
|
|
327
270
|
job_info = JobInfo(
|
|
328
271
|
job_id=job_id,
|
|
@@ -331,14 +274,13 @@ class LocalScheduler(BaseScheduler):
|
|
|
331
274
|
status=status,
|
|
332
275
|
queue="local",
|
|
333
276
|
exit_code=poll if poll is not None else None,
|
|
334
|
-
stdout_path=
|
|
335
|
-
stderr_path=
|
|
277
|
+
stdout_path=self._output_paths.get(job_id, {}).get("stdout"),
|
|
278
|
+
stderr_path=self._output_paths.get(job_id, {}).get("stderr"),
|
|
336
279
|
)
|
|
337
280
|
return job_info, {}
|
|
338
281
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
exit_code = LocalScheduler._exit_codes[job_id]
|
|
282
|
+
if job_id in self._exit_codes:
|
|
283
|
+
exit_code = self._exit_codes[job_id]
|
|
342
284
|
job_info = JobInfo(
|
|
343
285
|
job_id=job_id,
|
|
344
286
|
name=job_id,
|
|
@@ -346,8 +288,8 @@ class LocalScheduler(BaseScheduler):
|
|
|
346
288
|
status=JobStatus.COMPLETED if exit_code == 0 else JobStatus.FAILED,
|
|
347
289
|
queue="local",
|
|
348
290
|
exit_code=exit_code,
|
|
349
|
-
stdout_path=
|
|
350
|
-
stderr_path=
|
|
291
|
+
stdout_path=self._output_paths.get(job_id, {}).get("stdout"),
|
|
292
|
+
stderr_path=self._output_paths.get(job_id, {}).get("stderr"),
|
|
351
293
|
)
|
|
352
294
|
return job_info, {}
|
|
353
295
|
|
|
@@ -5,20 +5,44 @@
|
|
|
5
5
|
# Exit on error
|
|
6
6
|
set -e
|
|
7
7
|
|
|
8
|
+
# Module system initialization
|
|
9
|
+
{% if scheduler.module_init_script %}
|
|
10
|
+
. {{ scheduler.module_init_script }}
|
|
11
|
+
{% else %}
|
|
12
|
+
if [ -f /etc/profile.d/modules.sh ]; then
|
|
13
|
+
. /etc/profile.d/modules.sh
|
|
14
|
+
elif [ -f /usr/share/Modules/init/bash ]; then
|
|
15
|
+
. /usr/share/Modules/init/bash
|
|
16
|
+
elif [ -f /etc/modules/init/bash ]; then
|
|
17
|
+
. /etc/modules/init/bash
|
|
18
|
+
fi
|
|
19
|
+
{% endif %}
|
|
20
|
+
|
|
21
|
+
{% if scheduler.purge_modules %}
|
|
22
|
+
# Purge modules for clean environment
|
|
23
|
+
module purge{% if scheduler.silent_modules %} -s{% endif %}
|
|
24
|
+
|
|
25
|
+
{% endif %}
|
|
8
26
|
{% if job.modules_path %}
|
|
9
|
-
# Additional module paths
|
|
27
|
+
# Additional module paths
|
|
10
28
|
{% for path in job.modules_path %}
|
|
11
|
-
|
|
29
|
+
module use {{ path }}
|
|
12
30
|
{% endfor %}
|
|
13
31
|
{% endif %}
|
|
14
32
|
|
|
15
33
|
{% if job.modules %}
|
|
16
|
-
#
|
|
34
|
+
# Load modules
|
|
17
35
|
{% for mod in job.modules %}
|
|
18
|
-
|
|
36
|
+
module load {{ mod }}{% if scheduler.silent_modules %} -s{% endif %}
|
|
37
|
+
|
|
19
38
|
{% endfor %}
|
|
20
39
|
{% endif %}
|
|
21
40
|
|
|
41
|
+
{% if job.venv %}
|
|
42
|
+
# Activate virtual environment
|
|
43
|
+
source {{ job.venv }}/bin/activate
|
|
44
|
+
{% endif %}
|
|
45
|
+
|
|
22
46
|
{% if job.env_prepend %}
|
|
23
47
|
# Prepend to environment variables
|
|
24
48
|
{% for key, value in job.env_prepend.items() %}
|
|
@@ -40,9 +64,16 @@ export {{ key }}="{{ value }}"
|
|
|
40
64
|
{% endfor %}
|
|
41
65
|
{% endif %}
|
|
42
66
|
|
|
43
|
-
{% if
|
|
44
|
-
#
|
|
45
|
-
|
|
67
|
+
{% if stdout_path and merge_output %}
|
|
68
|
+
# Redirect stdout and stderr to file
|
|
69
|
+
exec > {{ stdout_path }} 2>&1
|
|
70
|
+
{% elif stdout_path %}
|
|
71
|
+
# Redirect stdout to file
|
|
72
|
+
exec > {{ stdout_path }}
|
|
73
|
+
{% endif %}
|
|
74
|
+
{% if stderr_path and not merge_output %}
|
|
75
|
+
# Redirect stderr to file
|
|
76
|
+
exec 2> {{ stderr_path }}
|
|
46
77
|
{% endif %}
|
|
47
78
|
|
|
48
79
|
# Execute command
|
|
@@ -7,7 +7,7 @@ both as a script directive (#$ ...) and as command-line arguments.
|
|
|
7
7
|
from hpc_runner.core.descriptors import SchedulerArg
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class SGEArg(SchedulerArg):
|
|
10
|
+
class SGEArg(SchedulerArg[str]):
|
|
11
11
|
"""Base class for SGE arguments.
|
|
12
12
|
|
|
13
13
|
SGE uses:
|
|
@@ -15,12 +15,12 @@ class SGEArg(SchedulerArg):
|
|
|
15
15
|
- CLI args: -flag value
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
def to_args(self, value) -> list[str]:
|
|
18
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
19
19
|
if value is None:
|
|
20
20
|
return []
|
|
21
21
|
return [f"-{self.flag}", str(value)]
|
|
22
22
|
|
|
23
|
-
def to_directive(self, value) -> str | None:
|
|
23
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
24
24
|
if value is None:
|
|
25
25
|
return None
|
|
26
26
|
return f"#$ -{self.flag} {value}"
|
|
@@ -34,42 +34,42 @@ class SGEArg(SchedulerArg):
|
|
|
34
34
|
class SGEJobNameArg(SGEArg):
|
|
35
35
|
"""Job name: -N name"""
|
|
36
36
|
|
|
37
|
-
def __init__(self):
|
|
37
|
+
def __init__(self) -> None:
|
|
38
38
|
super().__init__("N", doc="Job name")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class SGEQueueArg(SGEArg):
|
|
42
42
|
"""Queue selection: -q queue_name"""
|
|
43
43
|
|
|
44
|
-
def __init__(self):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
45
|
super().__init__("q", doc="Queue/partition name")
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class SGEOutputArg(SGEArg):
|
|
49
49
|
"""Stdout path: -o path"""
|
|
50
50
|
|
|
51
|
-
def __init__(self):
|
|
51
|
+
def __init__(self) -> None:
|
|
52
52
|
super().__init__("o", doc="Stdout file path")
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class SGEErrorArg(SGEArg):
|
|
56
56
|
"""Stderr path: -e path"""
|
|
57
57
|
|
|
58
|
-
def __init__(self):
|
|
58
|
+
def __init__(self) -> None:
|
|
59
59
|
super().__init__("e", doc="Stderr file path")
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class SGEPriorityArg(SGEArg):
|
|
63
63
|
"""Job priority: -p priority"""
|
|
64
64
|
|
|
65
|
-
def __init__(self):
|
|
65
|
+
def __init__(self) -> None:
|
|
66
66
|
super().__init__("p", doc="Job priority (-1023 to 1024)")
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
class SGEShellArg(SGEArg):
|
|
70
70
|
"""Shell selection: -S /path/to/shell"""
|
|
71
71
|
|
|
72
|
-
def __init__(self):
|
|
72
|
+
def __init__(self) -> None:
|
|
73
73
|
super().__init__("S", doc="Shell path")
|
|
74
74
|
|
|
75
75
|
|
|
@@ -81,7 +81,7 @@ class SGEShellArg(SGEArg):
|
|
|
81
81
|
class SGECwdArg(SchedulerArg[bool]):
|
|
82
82
|
"""Use current working directory: -cwd"""
|
|
83
83
|
|
|
84
|
-
def __init__(self):
|
|
84
|
+
def __init__(self) -> None:
|
|
85
85
|
super().__init__("cwd", doc="Execute in current working directory")
|
|
86
86
|
|
|
87
87
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -94,7 +94,7 @@ class SGECwdArg(SchedulerArg[bool]):
|
|
|
94
94
|
class SGEInheritEnvArg(SchedulerArg[bool]):
|
|
95
95
|
"""Inherit environment: -V"""
|
|
96
96
|
|
|
97
|
-
def __init__(self):
|
|
97
|
+
def __init__(self) -> None:
|
|
98
98
|
super().__init__("V", doc="Inherit environment variables")
|
|
99
99
|
|
|
100
100
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -107,7 +107,7 @@ class SGEInheritEnvArg(SchedulerArg[bool]):
|
|
|
107
107
|
class SGEMergeOutputArg(SchedulerArg[bool]):
|
|
108
108
|
"""Merge stdout and stderr: -j y"""
|
|
109
109
|
|
|
110
|
-
def __init__(self):
|
|
110
|
+
def __init__(self) -> None:
|
|
111
111
|
super().__init__("j", doc="Join stdout and stderr")
|
|
112
112
|
|
|
113
113
|
def to_args(self, value: bool | None) -> list[str]:
|
|
@@ -196,7 +196,7 @@ class SGEArrayArg(SchedulerArg[str]):
|
|
|
196
196
|
Range formats: 1-100, 1-100:10, 1,2,3,4
|
|
197
197
|
"""
|
|
198
198
|
|
|
199
|
-
def __init__(self):
|
|
199
|
+
def __init__(self) -> None:
|
|
200
200
|
super().__init__("t", doc="Array job range")
|
|
201
201
|
|
|
202
202
|
def to_args(self, value: str | None) -> list[str]:
|
|
@@ -218,7 +218,7 @@ class SGEArrayArg(SchedulerArg[str]):
|
|
|
218
218
|
class SGEHoldArg(SchedulerArg[str]):
|
|
219
219
|
"""Job dependency: -hold_jid job_id[,job_id,...]"""
|
|
220
220
|
|
|
221
|
-
def __init__(self):
|
|
221
|
+
def __init__(self) -> None:
|
|
222
222
|
super().__init__("hold_jid", doc="Hold until jobs complete")
|
|
223
223
|
|
|
224
224
|
def to_args(self, value: str | None) -> list[str]:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""SGE output parsing utilities."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
-
from datetime import datetime
|
|
5
4
|
import xml.etree.ElementTree as ET
|
|
5
|
+
from datetime import datetime
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from hpc_runner.core.result import JobStatus
|
|
@@ -131,9 +131,9 @@ def parse_qstat_plain(output: str) -> dict[str, Any]:
|
|
|
131
131
|
"""Parse plain qstat output.
|
|
132
132
|
|
|
133
133
|
Format:
|
|
134
|
-
job-ID prior name user state submit/start at queue
|
|
135
|
-
|
|
136
|
-
12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1
|
|
134
|
+
job-ID prior name user state submit/start at queue slots ja-task-ID
|
|
135
|
+
--------------------------------------------------------------------------
|
|
136
|
+
12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1 1
|
|
137
137
|
"""
|
|
138
138
|
jobs: dict[str, Any] = {}
|
|
139
139
|
|